Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.141
      1 /*	$NetBSD: linux_machdep.c,v 1.141 2008/10/25 23:38:28 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000, 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.141 2008/10/25 23:38:28 christos Exp $");
     34 
     35 #if defined(_KERNEL_OPT)
     36 #include "opt_vm86.h"
     37 #include "opt_user_ldt.h"
     38 #endif
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/signalvar.h>
     43 #include <sys/kernel.h>
     44 #include <sys/proc.h>
     45 #include <sys/user.h>
     46 #include <sys/buf.h>
     47 #include <sys/reboot.h>
     48 #include <sys/conf.h>
     49 #include <sys/exec.h>
     50 #include <sys/file.h>
     51 #include <sys/callout.h>
     52 #include <sys/malloc.h>
     53 #include <sys/mbuf.h>
     54 #include <sys/msgbuf.h>
     55 #include <sys/mount.h>
     56 #include <sys/vnode.h>
     57 #include <sys/device.h>
     58 #include <sys/syscallargs.h>
     59 #include <sys/filedesc.h>
     60 #include <sys/exec_elf.h>
     61 #include <sys/disklabel.h>
     62 #include <sys/ioctl.h>
     63 #include <sys/wait.h>
     64 #include <sys/kauth.h>
     65 
     66 #include <miscfs/specfs/specdev.h>
     67 
     68 #include <compat/linux/common/linux_types.h>
     69 #include <compat/linux/common/linux_signal.h>
     70 #include <compat/linux/common/linux_util.h>
     71 #include <compat/linux/common/linux_ioctl.h>
     72 #include <compat/linux/common/linux_hdio.h>
     73 #include <compat/linux/common/linux_exec.h>
     74 #include <compat/linux/common/linux_machdep.h>
     75 #include <compat/linux/common/linux_errno.h>
     76 
     77 #include <compat/linux/linux_syscallargs.h>
     78 
     79 #include <sys/cpu.h>
     80 #include <machine/cpufunc.h>
     81 #include <machine/psl.h>
     82 #include <machine/reg.h>
     83 #include <machine/segments.h>
     84 #include <machine/specialreg.h>
     85 #include <machine/sysarch.h>
     86 #include <machine/vm86.h>
     87 #include <machine/vmparam.h>
     88 
     89 /*
     90  * To see whether wscons is configured (for virtual console ioctl calls).
     91  */
     92 #if defined(_KERNEL_OPT)
     93 #include "wsdisplay.h"
     94 #endif
     95 #if (NWSDISPLAY > 0)
     96 #include <dev/wscons/wsconsio.h>
     97 #include <dev/wscons/wsdisplay_usl_io.h>
     98 #if defined(_KERNEL_OPT)
     99 #include "opt_xserver.h"
    100 #endif
    101 #endif
    102 
    103 #ifdef DEBUG_LINUX
    104 #define DPRINTF(a) uprintf a
    105 #else
    106 #define DPRINTF(a)
    107 #endif
    108 
    109 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
    110 extern struct disklist *x86_alldisks;
    111 static void linux_save_ucontext(struct lwp *, struct trapframe *,
    112     const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
    113 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
    114     const sigset_t *, struct linux_sigcontext *);
    115 static int linux_restore_sigcontext(struct lwp *,
    116     struct linux_sigcontext *, register_t *);
    117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
    118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
    119 
    120 extern char linux_sigcode[], linux_rt_sigcode[];
    121 /*
    122  * Deal with some i386-specific things in the Linux emulation code.
    123  */
    124 
    125 void
    126 linux_setregs(struct lwp *l, struct exec_package *epp, u_long stack)
    127 {
    128 	struct pcb *pcb = &l->l_addr->u_pcb;
    129 	struct trapframe *tf;
    130 
    131 #if NNPX > 0
    132 	/* If we were using the FPU, forget about it. */
    133 	if (npxproc == l)
    134 		npxdrop();
    135 #endif
    136 
    137 #ifdef USER_LDT
    138 	pmap_ldt_cleanup(l);
    139 #endif
    140 
    141 	l->l_md.md_flags &= ~MDL_USEDFPU;
    142 
    143 	if (i386_use_fxsave) {
    144 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    145 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    146 	} else
    147 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    148 
    149 	tf = l->l_md.md_regs;
    150 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    151 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    152 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    153 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    154 	tf->tf_edi = 0;
    155 	tf->tf_esi = 0;
    156 	tf->tf_ebp = 0;
    157 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    158 	tf->tf_edx = 0;
    159 	tf->tf_ecx = 0;
    160 	tf->tf_eax = 0;
    161 	tf->tf_eip = epp->ep_entry;
    162 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    163 	tf->tf_eflags = PSL_USERSET;
    164 	tf->tf_esp = stack;
    165 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    166 }
    167 
    168 /*
    169  * Send an interrupt to process.
    170  *
    171  * Stack is set up to allow sigcode stored
    172  * in u. to call routine, followed by kcall
    173  * to sigreturn routine below.  After sigreturn
    174  * resets the signal mask, the stack, and the
    175  * frame pointer, it returns to the user
    176  * specified pc, psl.
    177  */
    178 
    179 void
    180 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    181 {
    182 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    183 		linux_rt_sendsig(ksi, mask);
    184 	else
    185 		linux_old_sendsig(ksi, mask);
    186 }
    187 
    188 
    189 static void
    190 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
    191 {
    192 	uc->uc_flags = 0;
    193 	uc->uc_link = NULL;
    194 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    195 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    196 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    197 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    198 }
    199 
    200 static void
    201 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct linux_sigcontext *sc)
    202 {
    203 	/* Save register context. */
    204 #ifdef VM86
    205 	if (tf->tf_eflags & PSL_VM) {
    206 		sc->sc_gs = tf->tf_vm86_gs;
    207 		sc->sc_fs = tf->tf_vm86_fs;
    208 		sc->sc_es = tf->tf_vm86_es;
    209 		sc->sc_ds = tf->tf_vm86_ds;
    210 		sc->sc_eflags = get_vflags(l);
    211 	} else
    212 #endif
    213 	{
    214 		sc->sc_gs = tf->tf_gs;
    215 		sc->sc_fs = tf->tf_fs;
    216 		sc->sc_es = tf->tf_es;
    217 		sc->sc_ds = tf->tf_ds;
    218 		sc->sc_eflags = tf->tf_eflags;
    219 	}
    220 	sc->sc_edi = tf->tf_edi;
    221 	sc->sc_esi = tf->tf_esi;
    222 	sc->sc_esp = tf->tf_esp;
    223 	sc->sc_ebp = tf->tf_ebp;
    224 	sc->sc_ebx = tf->tf_ebx;
    225 	sc->sc_edx = tf->tf_edx;
    226 	sc->sc_ecx = tf->tf_ecx;
    227 	sc->sc_eax = tf->tf_eax;
    228 	sc->sc_eip = tf->tf_eip;
    229 	sc->sc_cs = tf->tf_cs;
    230 	sc->sc_esp_at_signal = tf->tf_esp;
    231 	sc->sc_ss = tf->tf_ss;
    232 	sc->sc_err = tf->tf_err;
    233 	sc->sc_trapno = tf->tf_trapno;
    234 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    235 	sc->sc_387 = NULL;
    236 
    237 	/* Save signal stack. */
    238 	/* Linux doesn't save the onstack flag in sigframe */
    239 
    240 	/* Save signal mask. */
    241 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    242 }
    243 
    244 static void
    245 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    246 {
    247 	struct lwp *l = curlwp;
    248 	struct proc *p = l->l_proc;
    249 	struct trapframe *tf;
    250 	struct linux_rt_sigframe *fp, frame;
    251 	int onstack, error;
    252 	linux_siginfo_t *lsi;
    253 	int sig = ksi->ksi_signo;
    254 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    255 	struct sigaltstack *sas = &l->l_sigstk;
    256 
    257 	tf = l->l_md.md_regs;
    258 	/* Do we need to jump onto the signal stack? */
    259 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    260 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    261 
    262 
    263 	/* Allocate space for the signal handler context. */
    264 	if (onstack)
    265 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
    266 		    sas->ss_size);
    267 	else
    268 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    269 	fp--;
    270 
    271 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    272 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    273 
    274 	/* Build stack frame for signal trampoline. */
    275 	frame.sf_handler = catcher;
    276 	frame.sf_sig = native_to_linux_signo[sig];
    277 	frame.sf_sip = &fp->sf_si;
    278 	frame.sf_ucp = &fp->sf_uc;
    279 
    280 	/*
    281 	 * XXX: the following code assumes that the constants for
    282 	 * siginfo are the same between linux and NetBSD.
    283 	 */
    284 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    285 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    286 	lsi->lsi_code = ksi->ksi_code;
    287 	switch (lsi->lsi_signo = frame.sf_sig) {
    288 	case LINUX_SIGILL:
    289 	case LINUX_SIGFPE:
    290 	case LINUX_SIGSEGV:
    291 	case LINUX_SIGBUS:
    292 	case LINUX_SIGTRAP:
    293 		lsi->lsi_addr = ksi->ksi_addr;
    294 		break;
    295 	case LINUX_SIGCHLD:
    296 		lsi->lsi_uid = ksi->ksi_uid;
    297 		lsi->lsi_pid = ksi->ksi_pid;
    298 		lsi->lsi_utime = ksi->ksi_utime;
    299 		lsi->lsi_stime = ksi->ksi_stime;
    300 		/* XXX is that right? */
    301 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    302 		break;
    303 	case LINUX_SIGIO:
    304 		lsi->lsi_band = ksi->ksi_band;
    305 		lsi->lsi_fd = ksi->ksi_fd;
    306 		break;
    307 	default:
    308 		lsi->lsi_uid = ksi->ksi_uid;
    309 		lsi->lsi_pid = ksi->ksi_pid;
    310 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    311 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    312 			lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
    313 		break;
    314 	}
    315 
    316 	/* Save register context. */
    317 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    318 	sendsig_reset(l, sig);
    319 
    320 	mutex_exit(p->p_lock);
    321 	error = copyout(&frame, fp, sizeof(frame));
    322 	mutex_enter(p->p_lock);
    323 
    324 	if (error != 0) {
    325 		/*
    326 		 * Process has trashed its stack; give it an illegal
    327 		 * instruction to halt it in its tracks.
    328 		 */
    329 		sigexit(l, SIGILL);
    330 		/* NOTREACHED */
    331 	}
    332 
    333 	/*
    334 	 * Build context to run handler in.
    335 	 */
    336 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    337 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    338 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    339 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    340 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    341 	    (linux_rt_sigcode - linux_sigcode);
    342 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    343 	tf->tf_eflags &= ~PSL_CLEARSIG;
    344 	tf->tf_esp = (int)fp;
    345 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    346 
    347 	/* Remember that we're now on the signal stack. */
    348 	if (onstack)
    349 		sas->ss_flags |= SS_ONSTACK;
    350 }
    351 
    352 static void
    353 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    354 {
    355 	struct lwp *l = curlwp;
    356 	struct proc *p = l->l_proc;
    357 	struct trapframe *tf;
    358 	struct linux_sigframe *fp, frame;
    359 	int onstack, error;
    360 	int sig = ksi->ksi_signo;
    361 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    362 	struct sigaltstack *sas = &l->l_sigstk;
    363 
    364 	tf = l->l_md.md_regs;
    365 
    366 	/* Do we need to jump onto the signal stack? */
    367 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    368 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    369 
    370 	/* Allocate space for the signal handler context. */
    371 	if (onstack)
    372 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
    373 		    sas->ss_size);
    374 	else
    375 		fp = (struct linux_sigframe *)tf->tf_esp;
    376 	fp--;
    377 
    378 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    379 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    380 
    381 	/* Build stack frame for signal trampoline. */
    382 	frame.sf_handler = catcher;
    383 	frame.sf_sig = native_to_linux_signo[sig];
    384 
    385 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    386 	sendsig_reset(l, sig);
    387 
    388 	mutex_exit(p->p_lock);
    389 	error = copyout(&frame, fp, sizeof(frame));
    390 	mutex_enter(p->p_lock);
    391 
    392 	if (error != 0) {
    393 		/*
    394 		 * Process has trashed its stack; give it an illegal
    395 		 * instruction to halt it in its tracks.
    396 		 */
    397 		sigexit(l, SIGILL);
    398 		/* NOTREACHED */
    399 	}
    400 
    401 	/*
    402 	 * Build context to run handler in.
    403 	 */
    404 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    405 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    406 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    407 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    408 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    409 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    410 	tf->tf_eflags &= ~PSL_CLEARSIG;
    411 	tf->tf_esp = (int)fp;
    412 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    413 
    414 	/* Remember that we're now on the signal stack. */
    415 	if (onstack)
    416 		sas->ss_flags |= SS_ONSTACK;
    417 }
    418 
    419 /*
    420  * System call to cleanup state after a signal
    421  * has been taken.  Reset signal mask and
    422  * stack state from context left by sendsig (above).
    423  * Return to previous pc and psl as specified by
    424  * context left by sendsig. Check carefully to
    425  * make sure that the user has not modified the
    426  * psl to gain improper privileges or to cause
    427  * a machine fault.
    428  */
    429 int
    430 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
    431 {
    432 	/* {
    433 		syscallarg(struct linux_ucontext *) ucp;
    434 	} */
    435 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    436 	int error;
    437 
    438 	/*
    439 	 * The trampoline code hands us the context.
    440 	 * It is unsafe to keep track of it ourselves, in the event that a
    441 	 * program jumps out of a signal handler.
    442 	 */
    443 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    444 		return error;
    445 
    446 	/* XXX XAX we can do better here by using more of the ucontext */
    447 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    448 }
    449 
    450 int
    451 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
    452 {
    453 	/* {
    454 		syscallarg(struct linux_sigcontext *) scp;
    455 	} */
    456 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    457 	int error;
    458 
    459 	/*
    460 	 * The trampoline code hands us the context.
    461 	 * It is unsafe to keep track of it ourselves, in the event that a
    462 	 * program jumps out of a signal handler.
    463 	 */
    464 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
    465 		return error;
    466 	return linux_restore_sigcontext(l, &context, retval);
    467 }
    468 
    469 static int
    470 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    471     register_t *retval)
    472 {
    473 	struct proc *p = l->l_proc;
    474 	struct sigaltstack *sas = &l->l_sigstk;
    475 	struct trapframe *tf;
    476 	sigset_t mask;
    477 	ssize_t ss_gap;
    478 	/* Restore register context. */
    479 	tf = l->l_md.md_regs;
    480 
    481 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    482 #ifdef VM86
    483 	if (scp->sc_eflags & PSL_VM) {
    484 		void syscall_vm86(struct trapframe *);
    485 
    486 		tf->tf_vm86_gs = scp->sc_gs;
    487 		tf->tf_vm86_fs = scp->sc_fs;
    488 		tf->tf_vm86_es = scp->sc_es;
    489 		tf->tf_vm86_ds = scp->sc_ds;
    490 		set_vflags(l, scp->sc_eflags);
    491 		p->p_md.md_syscall = syscall_vm86;
    492 	} else
    493 #endif
    494 	{
    495 		/*
    496 		 * Check for security violations.  If we're returning to
    497 		 * protected mode, the CPU will validate the segment registers
    498 		 * automatically and generate a trap on violations.  We handle
    499 		 * the trap, rather than doing all of the checking here.
    500 		 */
    501 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    502 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    503 			return EINVAL;
    504 
    505 		tf->tf_gs = scp->sc_gs;
    506 		tf->tf_fs = scp->sc_fs;
    507 		tf->tf_es = scp->sc_es;
    508 		tf->tf_ds = scp->sc_ds;
    509 #ifdef VM86
    510 		if (tf->tf_eflags & PSL_VM)
    511 			(*p->p_emul->e_syscall_intern)(p);
    512 #endif
    513 		tf->tf_eflags = scp->sc_eflags;
    514 	}
    515 	tf->tf_edi = scp->sc_edi;
    516 	tf->tf_esi = scp->sc_esi;
    517 	tf->tf_ebp = scp->sc_ebp;
    518 	tf->tf_ebx = scp->sc_ebx;
    519 	tf->tf_edx = scp->sc_edx;
    520 	tf->tf_ecx = scp->sc_ecx;
    521 	tf->tf_eax = scp->sc_eax;
    522 	tf->tf_eip = scp->sc_eip;
    523 	tf->tf_cs = scp->sc_cs;
    524 	tf->tf_esp = scp->sc_esp_at_signal;
    525 	tf->tf_ss = scp->sc_ss;
    526 
    527 	/* Restore signal stack. */
    528 	/*
    529 	 * Linux really does it this way; it doesn't have space in sigframe
    530 	 * to save the onstack flag.
    531 	 */
    532 	mutex_enter(p->p_lock);
    533 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
    534 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    535 		sas->ss_flags |= SS_ONSTACK;
    536 	else
    537 		sas->ss_flags &= ~SS_ONSTACK;
    538 
    539 	/* Restore signal mask. */
    540 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    541 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    542 	mutex_exit(p->p_lock);
    543 
    544 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    545 	return EJUSTRETURN;
    546 }
    547 
    548 #ifdef USER_LDT
    549 
    550 static int
    551 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
    552     register_t *retval)
    553 {
    554 	struct x86_get_ldt_args gl;
    555 	int error;
    556 	int num_ldt;
    557 	union descriptor *ldt_buf;
    558 
    559 	/*
    560 	 * I've checked the linux code - this function is asymetric with
    561 	 * linux_write_ldt, and returns raw ldt entries.
    562 	 * NB, the code I saw zerod the spare parts of the user buffer.
    563 	 */
    564 
    565 	DPRINTF(("linux_read_ldt!"));
    566 
    567 	num_ldt = x86_get_ldt_len(l);
    568 	if (num_ldt <= 0)
    569 		return EINVAL;
    570 
    571 	gl.start = 0;
    572 	gl.desc = NULL;
    573 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    574 
    575 	if (gl.num > num_ldt)
    576 		gl.num = num_ldt;
    577 
    578 	ldt_buf = malloc(gl.num * sizeof *ldt, M_TEMP, M_WAITOK);
    579 
    580 	error = x86_get_ldt1(l, &gl, ldt_buf);
    581 	/* NB gl.num might have changed */
    582 	if (error == 0) {
    583 		*retval = gl.num * sizeof *ldt;
    584 		error = copyout(ldt_buf, SCARG(uap, ptr),
    585 		    gl.num * sizeof *ldt_buf);
    586 	}
    587 	free(ldt_buf, M_TEMP);
    588 
    589 	return error;
    590 }
    591 
    592 struct linux_ldt_info {
    593 	u_int entry_number;
    594 	u_long base_addr;
    595 	u_int limit;
    596 	u_int seg_32bit:1;
    597 	u_int contents:2;
    598 	u_int read_exec_only:1;
    599 	u_int limit_in_pages:1;
    600 	u_int seg_not_present:1;
    601 	u_int useable:1;
    602 };
    603 
    604 static int
    605 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
    606     int oldmode)
    607 {
    608 	struct linux_ldt_info ldt_info;
    609 	union descriptor d;
    610 	struct x86_set_ldt_args sl;
    611 	int error;
    612 
    613 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    614 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    615 		return (EINVAL);
    616 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    617 		return error;
    618 	if (ldt_info.entry_number >= 8192)
    619 		return (EINVAL);
    620 	if (ldt_info.contents == 3) {
    621 		if (oldmode)
    622 			return (EINVAL);
    623 		if (ldt_info.seg_not_present)
    624 			return (EINVAL);
    625 	}
    626 
    627 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    628 	    (oldmode || (ldt_info.contents == 0 &&
    629 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    630 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    631 	    ldt_info.useable == 0))) {
    632 		/* this means you should zero the ldt */
    633 		(void)memset(&d, 0, sizeof(d));
    634 	} else {
    635 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    636 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    637 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
    638 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    639 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
    640 		    (!ldt_info.read_exec_only << 1);
    641 		d.sd.sd_dpl = SEL_UPL;
    642 		d.sd.sd_p = !ldt_info.seg_not_present;
    643 		d.sd.sd_def32 = ldt_info.seg_32bit;
    644 		d.sd.sd_gran = ldt_info.limit_in_pages;
    645 		if (!oldmode)
    646 			d.sd.sd_xx = ldt_info.useable;
    647 		else
    648 			d.sd.sd_xx = 0;
    649 	}
    650 	sl.start = ldt_info.entry_number;
    651 	sl.desc = NULL;;
    652 	sl.num = 1;
    653 
    654 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    655 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    656 
    657 	return x86_set_ldt1(l, &sl, &d);
    658 }
    659 
    660 #endif /* USER_LDT */
    661 
    662 int
    663 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
    664 {
    665 	/* {
    666 		syscallarg(int) func;
    667 		syscallarg(void *) ptr;
    668 		syscallarg(size_t) bytecount;
    669 	} */
    670 
    671 	switch (SCARG(uap, func)) {
    672 #ifdef USER_LDT
    673 	case 0:
    674 		return linux_read_ldt(l, (const void *)uap, retval);
    675 	case 1:
    676 		return linux_write_ldt(l, (const void *)uap, 1);
    677 	case 2:
    678 #ifdef notyet
    679 		return (linux_read_default_ldt(l, (const void *)uap, retval);
    680 #else
    681 		return (ENOSYS);
    682 #endif
    683 	case 0x11:
    684 		return linux_write_ldt(l, (const void *)uap, 0);
    685 #endif /* USER_LDT */
    686 
    687 	default:
    688 		return (ENOSYS);
    689 	}
    690 }
    691 
    692 /*
    693  * XXX Pathetic hack to make svgalib work. This will fake the major
    694  * device number of an opened VT so that svgalib likes it. grmbl.
    695  * Should probably do it 'wrong the right way' and use a mapping
    696  * array for all major device numbers, and map linux_mknod too.
    697  */
    698 dev_t
    699 linux_fakedev(dev_t dev, int raw)
    700 {
    701 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    702 	const struct cdevsw *cd = cdevsw_lookup(dev);
    703 
    704 	if (raw) {
    705 #if (NWSDISPLAY > 0)
    706 		extern const struct cdevsw wsdisplay_cdevsw;
    707 		if (cd == &wsdisplay_cdevsw)
    708 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    709 #endif
    710 	}
    711 
    712 	if (cd == &ptc_cdevsw)
    713 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    714 	if (cd == &pts_cdevsw)
    715 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    716 
    717 	return dev;
    718 }
    719 
    720 #if (NWSDISPLAY > 0)
    721 /*
    722  * That's not complete, but enough to get an X server running.
    723  */
    724 #define NR_KEYS 128
    725 static const u_short plain_map[NR_KEYS] = {
    726 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    727 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    728 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    729 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    730 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    731 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    732 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    733 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    734 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    735 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    736 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    737 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    738 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    739 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    740 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    741 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    742 }, shift_map[NR_KEYS] = {
    743 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    744 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    745 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    746 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    747 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    748 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    749 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    750 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    751 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    752 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    753 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    754 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    755 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    756 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    757 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    758 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    759 }, altgr_map[NR_KEYS] = {
    760 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    761 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    762 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    763 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    764 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    765 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    766 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    767 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    768 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    769 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    770 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    771 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    772 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    773 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    774 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    775 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    776 }, ctrl_map[NR_KEYS] = {
    777 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    778 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    779 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    780 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    781 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    782 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    783 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    784 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    785 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    786 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    787 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    788 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    789 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    790 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    791 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    792 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    793 };
    794 
    795 const u_short * const linux_keytabs[] = {
    796 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    797 };
    798 #endif
    799 
    800 static struct biosdisk_info *
    801 fd2biosinfo(struct proc *p, struct file *fp)
    802 {
    803 	struct vnode *vp;
    804 	const char *blkname;
    805 	char diskname[16];
    806 	int i;
    807 	struct nativedisk_info *nip;
    808 	struct disklist *dl = x86_alldisks;
    809 
    810 	if (fp->f_type != DTYPE_VNODE)
    811 		return NULL;
    812 	vp = (struct vnode *)fp->f_data;
    813 
    814 	if (vp->v_type != VBLK)
    815 		return NULL;
    816 
    817 	blkname = devsw_blk2name(major(vp->v_rdev));
    818 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    819 	    DISKUNIT(vp->v_rdev));
    820 
    821 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    822 		nip = &dl->dl_nativedisks[i];
    823 		if (strcmp(diskname, nip->ni_devname))
    824 			continue;
    825 		if (nip->ni_nmatches != 0)
    826 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    827 	}
    828 
    829 	return NULL;
    830 }
    831 
    832 
    833 /*
    834  * We come here in a last attempt to satisfy a Linux ioctl() call
    835  */
    836 int
    837 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
    838 {
    839 	/* {
    840 		syscallarg(int) fd;
    841 		syscallarg(u_long) com;
    842 		syscallarg(void *) data;
    843 	} */
    844 	struct sys_ioctl_args bia;
    845 	u_long com;
    846 	int error, error1;
    847 #if (NWSDISPLAY > 0)
    848 	struct vt_mode lvt;
    849 	struct kbentry kbe;
    850 #endif
    851 	struct linux_hd_geometry hdg;
    852 	struct linux_hd_big_geometry hdg_big;
    853 	struct biosdisk_info *bip;
    854 	file_t *fp;
    855 	int fd;
    856 	struct disklabel label, *labp;
    857 	struct partinfo partp;
    858 	int (*ioctlf)(struct file *, u_long, void *);
    859 	u_long start, biostotal, realtotal;
    860 	u_char heads, sectors;
    861 	u_int cylinders;
    862 	struct ioctl_pt pt;
    863 
    864 	fd = SCARG(uap, fd);
    865 	SCARG(&bia, fd) = fd;
    866 	SCARG(&bia, data) = SCARG(uap, data);
    867 	com = SCARG(uap, com);
    868 
    869 	if ((fp = fd_getfile(fd)) == NULL)
    870 		return (EBADF);
    871 
    872 	switch (com) {
    873 #if (NWSDISPLAY > 0)
    874 	case LINUX_KDGKBMODE:
    875 		com = KDGKBMODE;
    876 		break;
    877 	case LINUX_KDSKBMODE:
    878 		com = KDSKBMODE;
    879 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    880 			SCARG(&bia, data) = (void *)K_RAW;
    881 		break;
    882 	case LINUX_KIOCSOUND:
    883 		SCARG(&bia, data) =
    884 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    885 		/* fall through */
    886 	case LINUX_KDMKTONE:
    887 		com = KDMKTONE;
    888 		break;
    889 	case LINUX_KDSETMODE:
    890 		com = KDSETMODE;
    891 		break;
    892 	case LINUX_KDGETMODE:
    893 		/* KD_* values are equal to the wscons numbers */
    894 		com = WSDISPLAYIO_GMODE;
    895 		break;
    896 	case LINUX_KDENABIO:
    897 		com = KDENABIO;
    898 		break;
    899 	case LINUX_KDDISABIO:
    900 		com = KDDISABIO;
    901 		break;
    902 	case LINUX_KDGETLED:
    903 		com = KDGETLED;
    904 		break;
    905 	case LINUX_KDSETLED:
    906 		com = KDSETLED;
    907 		break;
    908 	case LINUX_VT_OPENQRY:
    909 		com = VT_OPENQRY;
    910 		break;
    911 	case LINUX_VT_GETMODE:
    912 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
    913 		if (error != 0)
    914 			goto out;
    915 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    916 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    917 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    918 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
    919 		goto out;
    920 	case LINUX_VT_SETMODE:
    921 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
    922 		if (error != 0)
    923 			goto out;
    924 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    925 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    926 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    927 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
    928 		goto out;
    929 	case LINUX_VT_DISALLOCATE:
    930 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    931 		error = 0;
    932 		goto out;
    933 	case LINUX_VT_RELDISP:
    934 		com = VT_RELDISP;
    935 		break;
    936 	case LINUX_VT_ACTIVATE:
    937 		com = VT_ACTIVATE;
    938 		break;
    939 	case LINUX_VT_WAITACTIVE:
    940 		com = VT_WAITACTIVE;
    941 		break;
    942 	case LINUX_VT_GETSTATE:
    943 		com = VT_GETSTATE;
    944 		break;
    945 	case LINUX_KDGKBTYPE:
    946 	    {
    947 		static const u_int8_t kb101 = KB_101;
    948 
    949 		/* This is what Linux does. */
    950 		error = copyout(&kb101, SCARG(uap, data), 1);
    951 		goto out;
    952 	    }
    953 	case LINUX_KDGKBENT:
    954 		/*
    955 		 * The Linux KDGKBENT ioctl is different from the
    956 		 * SYSV original. So we handle it in machdep code.
    957 		 * XXX We should use keyboard mapping information
    958 		 * from wsdisplay, but this would be expensive.
    959 		 */
    960 		if ((error = copyin(SCARG(uap, data), &kbe,
    961 				    sizeof(struct kbentry))))
    962 			goto out;
    963 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    964 		    || kbe.kb_index >= NR_KEYS) {
    965 			error = EINVAL;
    966 			goto out;
    967 		}
    968 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    969 		error = copyout(&kbe, SCARG(uap, data),
    970 				sizeof(struct kbentry));
    971 		goto out;
    972 #endif
    973 	case LINUX_HDIO_GETGEO:
    974 	case LINUX_HDIO_GETGEO_BIG:
    975 		/*
    976 		 * Try to mimic Linux behaviour: return the BIOS geometry
    977 		 * if possible (extending its # of cylinders if it's beyond
    978 		 * the 1023 limit), fall back to the MI geometry (i.e.
    979 		 * the real geometry) if not found, by returning an
    980 		 * error. See common/linux_hdio.c
    981 		 */
    982 		bip = fd2biosinfo(curproc, fp);
    983 		ioctlf = fp->f_ops->fo_ioctl;
    984 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
    985 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
    986 		if (error != 0 && error1 != 0) {
    987 			error = error1;
    988 			goto out;
    989 		}
    990 		labp = error != 0 ? &label : partp.disklab;
    991 		start = error1 != 0 ? partp.part->p_offset : 0;
    992 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    993 		    && bip->bi_cyl != 0) {
    994 			heads = bip->bi_head;
    995 			sectors = bip->bi_sec;
    996 			cylinders = bip->bi_cyl;
    997 			biostotal = heads * sectors * cylinders;
    998 			realtotal = labp->d_ntracks * labp->d_nsectors *
    999 			    labp->d_ncylinders;
   1000 			if (realtotal > biostotal)
   1001 				cylinders = realtotal / (heads * sectors);
   1002 		} else {
   1003 			heads = labp->d_ntracks;
   1004 			cylinders = labp->d_ncylinders;
   1005 			sectors = labp->d_nsectors;
   1006 		}
   1007 		if (com == LINUX_HDIO_GETGEO) {
   1008 			hdg.start = start;
   1009 			hdg.heads = heads;
   1010 			hdg.cylinders = cylinders;
   1011 			hdg.sectors = sectors;
   1012 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1013 			goto out;
   1014 		} else {
   1015 			hdg_big.start = start;
   1016 			hdg_big.heads = heads;
   1017 			hdg_big.cylinders = cylinders;
   1018 			hdg_big.sectors = sectors;
   1019 			error = copyout(&hdg_big, SCARG(uap, data),
   1020 			    sizeof hdg_big);
   1021 			goto out;
   1022 		}
   1023 
   1024 	default:
   1025 		/*
   1026 		 * Unknown to us. If it's on a device, just pass it through
   1027 		 * using PTIOCLINUX, the device itself might be able to
   1028 		 * make some sense of it.
   1029 		 * XXX hack: if the function returns EJUSTRETURN,
   1030 		 * it has stuffed a sysctl return value in pt.data.
   1031 		 */
   1032 		ioctlf = fp->f_ops->fo_ioctl;
   1033 		pt.com = SCARG(uap, com);
   1034 		pt.data = SCARG(uap, data);
   1035 		error = ioctlf(fp, PTIOCLINUX, &pt);
   1036 		if (error == EJUSTRETURN) {
   1037 			retval[0] = (register_t)pt.data;
   1038 			error = 0;
   1039 		}
   1040 
   1041 		if (error == ENOTTY) {
   1042 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1043 			    com));
   1044 		}
   1045 		goto out;
   1046 	}
   1047 	SCARG(&bia, com) = com;
   1048 	error = sys_ioctl(curlwp, &bia, retval);
   1049 out:
   1050 	fd_putfile(fd);
   1051 	return error;
   1052 }
   1053 
   1054 /*
   1055  * Set I/O permissions for a process. Just set the maximum level
   1056  * right away (ignoring the argument), otherwise we would have
   1057  * to rely on I/O permission maps, which are not implemented.
   1058  */
   1059 int
   1060 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
   1061 {
   1062 	/* {
   1063 		syscallarg(int) level;
   1064 	} */
   1065 	struct trapframe *fp = l->l_md.md_regs;
   1066 
   1067 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1068 	    NULL, NULL, NULL, NULL) != 0)
   1069 		return EPERM;
   1070 	fp->tf_eflags |= PSL_IOPL;
   1071 	*retval = 0;
   1072 	return 0;
   1073 }
   1074 
   1075 /*
   1076  * See above. If a root process tries to set access to an I/O port,
   1077  * just let it have the whole range.
   1078  */
   1079 int
   1080 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
   1081 {
   1082 	/* {
   1083 		syscallarg(unsigned int) lo;
   1084 		syscallarg(unsigned int) hi;
   1085 		syscallarg(int) val;
   1086 	} */
   1087 	struct trapframe *fp = l->l_md.md_regs;
   1088 
   1089 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1090 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1091 	    NULL, NULL) != 0)
   1092 		return EPERM;
   1093 	if (SCARG(uap, val))
   1094 		fp->tf_eflags |= PSL_IOPL;
   1095 	*retval = 0;
   1096 	return 0;
   1097 }
   1098 
   1099 int
   1100 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1101     void *arg)
   1102 {
   1103 	return 0;
   1104 }
   1105 
   1106 const char *
   1107 linux_get_uname_arch(void)
   1108 {
   1109 	static char uname_arch[5] = "i386";
   1110 
   1111 	if (uname_arch[1] == '3')
   1112 		uname_arch[1] += cpu_class;
   1113 	return uname_arch;
   1114 }
   1115 
   1116 #ifdef LINUX_NPTL
   1117 void *
   1118 linux_get_newtls(struct lwp *l)
   1119 {
   1120 #if 0
   1121 	struct trapframe *tf = l->l_md.md_regs;
   1122 #endif
   1123 
   1124 	/* XXX: Implement me */
   1125 	return NULL;
   1126 }
   1127 
   1128 int
   1129 linux_set_newtls(struct lwp *l, void *tls)
   1130 {
   1131 	/* XXX: Implement me */
   1132 	return 0;
   1133 }
   1134 #endif
   1135