Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.136.2.1
      1 /*	$NetBSD: linux_machdep.c,v 1.136.2.1 2008/05/10 23:48:52 wrstuden Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000, 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.136.2.1 2008/05/10 23:48:52 wrstuden Exp $");
     34 
     35 #if defined(_KERNEL_OPT)
     36 #include "opt_vm86.h"
     37 #include "opt_user_ldt.h"
     38 #endif
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/signalvar.h>
     43 #include <sys/kernel.h>
     44 #include <sys/proc.h>
     45 #include <sys/user.h>
     46 #include <sys/buf.h>
     47 #include <sys/reboot.h>
     48 #include <sys/conf.h>
     49 #include <sys/exec.h>
     50 #include <sys/file.h>
     51 #include <sys/callout.h>
     52 #include <sys/malloc.h>
     53 #include <sys/mbuf.h>
     54 #include <sys/msgbuf.h>
     55 #include <sys/mount.h>
     56 #include <sys/vnode.h>
     57 #include <sys/device.h>
     58 #include <sys/sa.h>
     59 #include <sys/syscallargs.h>
     60 #include <sys/filedesc.h>
     61 #include <sys/exec_elf.h>
     62 #include <sys/disklabel.h>
     63 #include <sys/ioctl.h>
     64 #include <sys/wait.h>
     65 #include <sys/kauth.h>
     66 
     67 #include <miscfs/specfs/specdev.h>
     68 
     69 #include <compat/linux/common/linux_types.h>
     70 #include <compat/linux/common/linux_signal.h>
     71 #include <compat/linux/common/linux_util.h>
     72 #include <compat/linux/common/linux_ioctl.h>
     73 #include <compat/linux/common/linux_hdio.h>
     74 #include <compat/linux/common/linux_exec.h>
     75 #include <compat/linux/common/linux_machdep.h>
     76 #include <compat/linux/common/linux_errno.h>
     77 
     78 #include <compat/linux/linux_syscallargs.h>
     79 
     80 #include <sys/cpu.h>
     81 #include <machine/cpufunc.h>
     82 #include <machine/psl.h>
     83 #include <machine/reg.h>
     84 #include <machine/segments.h>
     85 #include <machine/specialreg.h>
     86 #include <machine/sysarch.h>
     87 #include <machine/vm86.h>
     88 #include <machine/vmparam.h>
     89 
     90 /*
     91  * To see whether wscons is configured (for virtual console ioctl calls).
     92  */
     93 #if defined(_KERNEL_OPT)
     94 #include "wsdisplay.h"
     95 #endif
     96 #if (NWSDISPLAY > 0)
     97 #include <dev/wscons/wsconsio.h>
     98 #include <dev/wscons/wsdisplay_usl_io.h>
     99 #if defined(_KERNEL_OPT)
    100 #include "opt_xserver.h"
    101 #endif
    102 #endif
    103 
    104 #ifdef DEBUG_LINUX
    105 #define DPRINTF(a) uprintf a
    106 #else
    107 #define DPRINTF(a)
    108 #endif
    109 
    110 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
    111 extern struct disklist *x86_alldisks;
    112 static void linux_save_ucontext(struct lwp *, struct trapframe *,
    113     const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
    114 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
    115     const sigset_t *, struct linux_sigcontext *);
    116 static int linux_restore_sigcontext(struct lwp *,
    117     struct linux_sigcontext *, register_t *);
    118 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
    119 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
    120 
    121 extern char linux_sigcode[], linux_rt_sigcode[];
    122 /*
    123  * Deal with some i386-specific things in the Linux emulation code.
    124  */
    125 
    126 void
    127 linux_setregs(struct lwp *l, struct exec_package *epp, u_long stack)
    128 {
    129 	struct pcb *pcb = &l->l_addr->u_pcb;
    130 	struct trapframe *tf;
    131 
    132 #if NNPX > 0
    133 	/* If we were using the FPU, forget about it. */
    134 	if (npxproc == l)
    135 		npxdrop();
    136 #endif
    137 
    138 #ifdef USER_LDT
    139 	pmap_ldt_cleanup(l);
    140 #endif
    141 
    142 	l->l_md.md_flags &= ~MDL_USEDFPU;
    143 
    144 	if (i386_use_fxsave) {
    145 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    146 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    147 	} else
    148 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    149 
    150 	tf = l->l_md.md_regs;
    151 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    152 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    153 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    154 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    155 	tf->tf_edi = 0;
    156 	tf->tf_esi = 0;
    157 	tf->tf_ebp = 0;
    158 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    159 	tf->tf_edx = 0;
    160 	tf->tf_ecx = 0;
    161 	tf->tf_eax = 0;
    162 	tf->tf_eip = epp->ep_entry;
    163 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    164 	tf->tf_eflags = PSL_USERSET;
    165 	tf->tf_esp = stack;
    166 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    167 }
    168 
    169 /*
    170  * Send an interrupt to process.
    171  *
    172  * Stack is set up to allow sigcode stored
    173  * in u. to call routine, followed by kcall
    174  * to sigreturn routine below.  After sigreturn
    175  * resets the signal mask, the stack, and the
    176  * frame pointer, it returns to the user
    177  * specified pc, psl.
    178  */
    179 
    180 void
    181 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    182 {
    183 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    184 		linux_rt_sendsig(ksi, mask);
    185 	else
    186 		linux_old_sendsig(ksi, mask);
    187 }
    188 
    189 
    190 static void
    191 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
    192 {
    193 	uc->uc_flags = 0;
    194 	uc->uc_link = NULL;
    195 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    196 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    197 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    198 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    199 }
    200 
    201 static void
    202 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct linux_sigcontext *sc)
    203 {
    204 	/* Save register context. */
    205 #ifdef VM86
    206 	if (tf->tf_eflags & PSL_VM) {
    207 		sc->sc_gs = tf->tf_vm86_gs;
    208 		sc->sc_fs = tf->tf_vm86_fs;
    209 		sc->sc_es = tf->tf_vm86_es;
    210 		sc->sc_ds = tf->tf_vm86_ds;
    211 		sc->sc_eflags = get_vflags(l);
    212 	} else
    213 #endif
    214 	{
    215 		sc->sc_gs = tf->tf_gs;
    216 		sc->sc_fs = tf->tf_fs;
    217 		sc->sc_es = tf->tf_es;
    218 		sc->sc_ds = tf->tf_ds;
    219 		sc->sc_eflags = tf->tf_eflags;
    220 	}
    221 	sc->sc_edi = tf->tf_edi;
    222 	sc->sc_esi = tf->tf_esi;
    223 	sc->sc_esp = tf->tf_esp;
    224 	sc->sc_ebp = tf->tf_ebp;
    225 	sc->sc_ebx = tf->tf_ebx;
    226 	sc->sc_edx = tf->tf_edx;
    227 	sc->sc_ecx = tf->tf_ecx;
    228 	sc->sc_eax = tf->tf_eax;
    229 	sc->sc_eip = tf->tf_eip;
    230 	sc->sc_cs = tf->tf_cs;
    231 	sc->sc_esp_at_signal = tf->tf_esp;
    232 	sc->sc_ss = tf->tf_ss;
    233 	sc->sc_err = tf->tf_err;
    234 	sc->sc_trapno = tf->tf_trapno;
    235 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    236 	sc->sc_387 = NULL;
    237 
    238 	/* Save signal stack. */
    239 	/* Linux doesn't save the onstack flag in sigframe */
    240 
    241 	/* Save signal mask. */
    242 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    243 }
    244 
    245 static void
    246 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    247 {
    248 	struct lwp *l = curlwp;
    249 	struct proc *p = l->l_proc;
    250 	struct trapframe *tf;
    251 	struct linux_rt_sigframe *fp, frame;
    252 	int onstack, error;
    253 	linux_siginfo_t *lsi;
    254 	int sig = ksi->ksi_signo;
    255 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    256 	struct sigaltstack *sas = l->l_sigstk;
    257 
    258 	tf = l->l_md.md_regs;
    259 	/* Do we need to jump onto the signal stack? */
    260 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    261 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    262 
    263 
    264 	/* Allocate space for the signal handler context. */
    265 	if (onstack)
    266 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
    267 		    sas->ss_size);
    268 	else
    269 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    270 	fp--;
    271 
    272 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    273 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    274 
    275 	/* Build stack frame for signal trampoline. */
    276 	frame.sf_handler = catcher;
    277 	frame.sf_sig = native_to_linux_signo[sig];
    278 	frame.sf_sip = &fp->sf_si;
    279 	frame.sf_ucp = &fp->sf_uc;
    280 
    281 	/*
    282 	 * XXX: the following code assumes that the constants for
    283 	 * siginfo are the same between linux and NetBSD.
    284 	 */
    285 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    286 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    287 	lsi->lsi_code = ksi->ksi_code;
    288 	switch (lsi->lsi_signo = frame.sf_sig) {
    289 	case LINUX_SIGILL:
    290 	case LINUX_SIGFPE:
    291 	case LINUX_SIGSEGV:
    292 	case LINUX_SIGBUS:
    293 	case LINUX_SIGTRAP:
    294 		lsi->lsi_addr = ksi->ksi_addr;
    295 		break;
    296 	case LINUX_SIGCHLD:
    297 		lsi->lsi_uid = ksi->ksi_uid;
    298 		lsi->lsi_pid = ksi->ksi_pid;
    299 		lsi->lsi_utime = ksi->ksi_utime;
    300 		lsi->lsi_stime = ksi->ksi_stime;
    301 
    302 		/* We use the same codes */
    303 		lsi->lsi_code = ksi->ksi_code;
    304 		/* XXX is that right? */
    305 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    306 		break;
    307 	case LINUX_SIGIO:
    308 		lsi->lsi_band = ksi->ksi_band;
    309 		lsi->lsi_fd = ksi->ksi_fd;
    310 		break;
    311 	default:
    312 		lsi->lsi_uid = ksi->ksi_uid;
    313 		lsi->lsi_pid = ksi->ksi_pid;
    314 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    315 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    316 			lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
    317 		break;
    318 	}
    319 
    320 	/* Save register context. */
    321 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    322 	sendsig_reset(l, sig);
    323 
    324 	mutex_exit(p->p_lock);
    325 	error = copyout(&frame, fp, sizeof(frame));
    326 	mutex_enter(p->p_lock);
    327 
    328 	if (error != 0) {
    329 		/*
    330 		 * Process has trashed its stack; give it an illegal
    331 		 * instruction to halt it in its tracks.
    332 		 */
    333 		sigexit(l, SIGILL);
    334 		/* NOTREACHED */
    335 	}
    336 
    337 	/*
    338 	 * Build context to run handler in.
    339 	 */
    340 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    341 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    342 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    343 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    344 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    345 	    (linux_rt_sigcode - linux_sigcode);
    346 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    347 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    348 	tf->tf_esp = (int)fp;
    349 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    350 
    351 	/* Remember that we're now on the signal stack. */
    352 	if (onstack)
    353 		sas->ss_flags |= SS_ONSTACK;
    354 }
    355 
    356 static void
    357 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    358 {
    359 	struct lwp *l = curlwp;
    360 	struct proc *p = l->l_proc;
    361 	struct trapframe *tf;
    362 	struct linux_sigframe *fp, frame;
    363 	int onstack, error;
    364 	int sig = ksi->ksi_signo;
    365 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    366 	struct sigaltstack *sas = l->l_sigstk;
    367 
    368 	tf = l->l_md.md_regs;
    369 
    370 	/* Do we need to jump onto the signal stack? */
    371 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    372 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    373 
    374 	/* Allocate space for the signal handler context. */
    375 	if (onstack)
    376 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
    377 		    sas->ss_size);
    378 	else
    379 		fp = (struct linux_sigframe *)tf->tf_esp;
    380 	fp--;
    381 
    382 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    383 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    384 
    385 	/* Build stack frame for signal trampoline. */
    386 	frame.sf_handler = catcher;
    387 	frame.sf_sig = native_to_linux_signo[sig];
    388 
    389 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    390 	sendsig_reset(l, sig);
    391 
    392 	mutex_exit(p->p_lock);
    393 	error = copyout(&frame, fp, sizeof(frame));
    394 	mutex_enter(p->p_lock);
    395 
    396 	if (error != 0) {
    397 		/*
    398 		 * Process has trashed its stack; give it an illegal
    399 		 * instruction to halt it in its tracks.
    400 		 */
    401 		sigexit(l, SIGILL);
    402 		/* NOTREACHED */
    403 	}
    404 
    405 	/*
    406 	 * Build context to run handler in.
    407 	 */
    408 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    409 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    410 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    411 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    412 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    413 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    414 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    415 	tf->tf_esp = (int)fp;
    416 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    417 
    418 	/* Remember that we're now on the signal stack. */
    419 	if (onstack)
    420 		sas->ss_flags |= SS_ONSTACK;
    421 }
    422 
    423 /*
    424  * System call to cleanup state after a signal
    425  * has been taken.  Reset signal mask and
    426  * stack state from context left by sendsig (above).
    427  * Return to previous pc and psl as specified by
    428  * context left by sendsig. Check carefully to
    429  * make sure that the user has not modified the
    430  * psl to gain improper privileges or to cause
    431  * a machine fault.
    432  */
    433 int
    434 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
    435 {
    436 	/* {
    437 		syscallarg(struct linux_ucontext *) ucp;
    438 	} */
    439 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    440 	int error;
    441 
    442 	/*
    443 	 * The trampoline code hands us the context.
    444 	 * It is unsafe to keep track of it ourselves, in the event that a
    445 	 * program jumps out of a signal handler.
    446 	 */
    447 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    448 		return error;
    449 
    450 	/* XXX XAX we can do better here by using more of the ucontext */
    451 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    452 }
    453 
    454 int
    455 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
    456 {
    457 	/* {
    458 		syscallarg(struct linux_sigcontext *) scp;
    459 	} */
    460 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    461 	int error;
    462 
    463 	/*
    464 	 * The trampoline code hands us the context.
    465 	 * It is unsafe to keep track of it ourselves, in the event that a
    466 	 * program jumps out of a signal handler.
    467 	 */
    468 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
    469 		return error;
    470 	return linux_restore_sigcontext(l, &context, retval);
    471 }
    472 
    473 static int
    474 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    475     register_t *retval)
    476 {
    477 	struct proc *p = l->l_proc;
    478 	struct sigaltstack *sas = l->l_sigstk;
    479 	struct trapframe *tf;
    480 	sigset_t mask;
    481 	ssize_t ss_gap;
    482 	/* Restore register context. */
    483 	tf = l->l_md.md_regs;
    484 
    485 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    486 #ifdef VM86
    487 	if (scp->sc_eflags & PSL_VM) {
    488 		void syscall_vm86(struct trapframe *);
    489 
    490 		tf->tf_vm86_gs = scp->sc_gs;
    491 		tf->tf_vm86_fs = scp->sc_fs;
    492 		tf->tf_vm86_es = scp->sc_es;
    493 		tf->tf_vm86_ds = scp->sc_ds;
    494 		set_vflags(l, scp->sc_eflags);
    495 		p->p_md.md_syscall = syscall_vm86;
    496 	} else
    497 #endif
    498 	{
    499 		/*
    500 		 * Check for security violations.  If we're returning to
    501 		 * protected mode, the CPU will validate the segment registers
    502 		 * automatically and generate a trap on violations.  We handle
    503 		 * the trap, rather than doing all of the checking here.
    504 		 */
    505 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    506 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    507 			return EINVAL;
    508 
    509 		tf->tf_gs = scp->sc_gs;
    510 		tf->tf_fs = scp->sc_fs;
    511 		tf->tf_es = scp->sc_es;
    512 		tf->tf_ds = scp->sc_ds;
    513 #ifdef VM86
    514 		if (tf->tf_eflags & PSL_VM)
    515 			(*p->p_emul->e_syscall_intern)(p);
    516 #endif
    517 		tf->tf_eflags = scp->sc_eflags;
    518 	}
    519 	tf->tf_edi = scp->sc_edi;
    520 	tf->tf_esi = scp->sc_esi;
    521 	tf->tf_ebp = scp->sc_ebp;
    522 	tf->tf_ebx = scp->sc_ebx;
    523 	tf->tf_edx = scp->sc_edx;
    524 	tf->tf_ecx = scp->sc_ecx;
    525 	tf->tf_eax = scp->sc_eax;
    526 	tf->tf_eip = scp->sc_eip;
    527 	tf->tf_cs = scp->sc_cs;
    528 	tf->tf_esp = scp->sc_esp_at_signal;
    529 	tf->tf_ss = scp->sc_ss;
    530 
    531 	/* Restore signal stack. */
    532 	/*
    533 	 * Linux really does it this way; it doesn't have space in sigframe
    534 	 * to save the onstack flag.
    535 	 */
    536 	mutex_enter(p->p_lock);
    537 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
    538 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    539 		sas->ss_flags |= SS_ONSTACK;
    540 	else
    541 		sas->ss_flags &= ~SS_ONSTACK;
    542 
    543 	/* Restore signal mask. */
    544 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    545 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    546 	mutex_exit(p->p_lock);
    547 
    548 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    549 	return EJUSTRETURN;
    550 }
    551 
    552 #ifdef USER_LDT
    553 
    554 static int
    555 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
    556     register_t *retval)
    557 {
    558 	struct x86_get_ldt_args gl;
    559 	int error;
    560 	int num_ldt;
    561 	union descriptor *ldt_buf;
    562 
    563 	/*
    564 	 * I've checked the linux code - this function is asymetric with
    565 	 * linux_write_ldt, and returns raw ldt entries.
    566 	 * NB, the code I saw zerod the spare parts of the user buffer.
    567 	 */
    568 
    569 	DPRINTF(("linux_read_ldt!"));
    570 
    571 	num_ldt = x86_get_ldt_len(l);
    572 	if (num_ldt <= 0)
    573 		return EINVAL;
    574 
    575 	gl.start = 0;
    576 	gl.desc = NULL;
    577 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    578 
    579 	if (gl.num > num_ldt)
    580 		gl.num = num_ldt;
    581 
    582 	ldt_buf = malloc(gl.num * sizeof *ldt, M_TEMP, M_WAITOK);
    583 
    584 	error = x86_get_ldt1(l, &gl, ldt_buf);
    585 	/* NB gl.num might have changed */
    586 	if (error == 0) {
    587 		*retval = gl.num * sizeof *ldt;
    588 		error = copyout(ldt_buf, SCARG(uap, ptr),
    589 		    gl.num * sizeof *ldt_buf);
    590 	}
    591 	free(ldt, M_TEMP);
    592 
    593 	return error;
    594 }
    595 
    596 struct linux_ldt_info {
    597 	u_int entry_number;
    598 	u_long base_addr;
    599 	u_int limit;
    600 	u_int seg_32bit:1;
    601 	u_int contents:2;
    602 	u_int read_exec_only:1;
    603 	u_int limit_in_pages:1;
    604 	u_int seg_not_present:1;
    605 	u_int useable:1;
    606 };
    607 
    608 static int
    609 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
    610     int oldmode)
    611 {
    612 	struct linux_ldt_info ldt_info;
    613 	union descriptor d;
    614 	struct x86_set_ldt_args sl;
    615 	int error;
    616 
    617 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    618 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    619 		return (EINVAL);
    620 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    621 		return error;
    622 	if (ldt_info.entry_number >= 8192)
    623 		return (EINVAL);
    624 	if (ldt_info.contents == 3) {
    625 		if (oldmode)
    626 			return (EINVAL);
    627 		if (ldt_info.seg_not_present)
    628 			return (EINVAL);
    629 	}
    630 
    631 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    632 	    (oldmode || (ldt_info.contents == 0 &&
    633 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    634 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    635 	    ldt_info.useable == 0))) {
    636 		/* this means you should zero the ldt */
    637 		(void)memset(&d, 0, sizeof(d));
    638 	} else {
    639 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    640 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    641 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
    642 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    643 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
    644 		    (!ldt_info.read_exec_only << 1);
    645 		d.sd.sd_dpl = SEL_UPL;
    646 		d.sd.sd_p = !ldt_info.seg_not_present;
    647 		d.sd.sd_def32 = ldt_info.seg_32bit;
    648 		d.sd.sd_gran = ldt_info.limit_in_pages;
    649 		if (!oldmode)
    650 			d.sd.sd_xx = ldt_info.useable;
    651 		else
    652 			d.sd.sd_xx = 0;
    653 	}
    654 	sl.start = ldt_info.entry_number;
    655 	sl.desc = NULL;;
    656 	sl.num = 1;
    657 
    658 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    659 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    660 
    661 	return x86_set_ldt1(l, &sl, &d);
    662 }
    663 
    664 #endif /* USER_LDT */
    665 
    666 int
    667 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
    668 {
    669 	/* {
    670 		syscallarg(int) func;
    671 		syscallarg(void *) ptr;
    672 		syscallarg(size_t) bytecount;
    673 	} */
    674 
    675 	switch (SCARG(uap, func)) {
    676 #ifdef USER_LDT
    677 	case 0:
    678 		return linux_read_ldt(l, (const void *)uap, retval);
    679 	case 1:
    680 		return linux_write_ldt(l, (const void *)uap, 1);
    681 	case 2:
    682 #ifdef notyet
    683 		return (linux_read_default_ldt(l, (const void *)uap, retval);
    684 #else
    685 		return (ENOSYS);
    686 #endif
    687 	case 0x11:
    688 		return linux_write_ldt(l, (const void *)uap, 0);
    689 #endif /* USER_LDT */
    690 
    691 	default:
    692 		return (ENOSYS);
    693 	}
    694 }
    695 
    696 /*
    697  * XXX Pathetic hack to make svgalib work. This will fake the major
    698  * device number of an opened VT so that svgalib likes it. grmbl.
    699  * Should probably do it 'wrong the right way' and use a mapping
    700  * array for all major device numbers, and map linux_mknod too.
    701  */
    702 dev_t
    703 linux_fakedev(dev_t dev, int raw)
    704 {
    705 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    706 	const struct cdevsw *cd = cdevsw_lookup(dev);
    707 
    708 	if (raw) {
    709 #if (NWSDISPLAY > 0)
    710 		extern const struct cdevsw wsdisplay_cdevsw;
    711 		if (cd == &wsdisplay_cdevsw)
    712 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    713 #endif
    714 	}
    715 
    716 	if (cd == &ptc_cdevsw)
    717 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    718 	if (cd == &pts_cdevsw)
    719 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    720 
    721 	return dev;
    722 }
    723 
    724 #if (NWSDISPLAY > 0)
    725 /*
    726  * That's not complete, but enough to get an X server running.
    727  */
    728 #define NR_KEYS 128
    729 static const u_short plain_map[NR_KEYS] = {
    730 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    731 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    732 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    733 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    734 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    735 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    736 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    737 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    738 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    739 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    740 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    741 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    742 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    743 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    744 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    745 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    746 }, shift_map[NR_KEYS] = {
    747 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    748 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    749 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    750 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    751 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    752 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    753 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    754 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    755 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    756 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    757 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    758 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    759 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    760 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    761 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    762 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    763 }, altgr_map[NR_KEYS] = {
    764 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    765 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    766 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    767 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    768 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    769 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    770 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    771 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    772 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    773 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    774 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    775 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    776 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    777 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    778 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    779 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    780 }, ctrl_map[NR_KEYS] = {
    781 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    782 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    783 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    784 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    785 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    786 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    787 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    788 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    789 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    790 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    791 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    792 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    793 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    794 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    795 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    796 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    797 };
    798 
    799 const u_short * const linux_keytabs[] = {
    800 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    801 };
    802 #endif
    803 
    804 static struct biosdisk_info *
    805 fd2biosinfo(struct proc *p, struct file *fp)
    806 {
    807 	struct vnode *vp;
    808 	const char *blkname;
    809 	char diskname[16];
    810 	int i;
    811 	struct nativedisk_info *nip;
    812 	struct disklist *dl = x86_alldisks;
    813 
    814 	if (fp->f_type != DTYPE_VNODE)
    815 		return NULL;
    816 	vp = (struct vnode *)fp->f_data;
    817 
    818 	if (vp->v_type != VBLK)
    819 		return NULL;
    820 
    821 	blkname = devsw_blk2name(major(vp->v_rdev));
    822 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    823 	    DISKUNIT(vp->v_rdev));
    824 
    825 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    826 		nip = &dl->dl_nativedisks[i];
    827 		if (strcmp(diskname, nip->ni_devname))
    828 			continue;
    829 		if (nip->ni_nmatches != 0)
    830 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    831 	}
    832 
    833 	return NULL;
    834 }
    835 
    836 
    837 /*
    838  * We come here in a last attempt to satisfy a Linux ioctl() call
    839  */
    840 int
    841 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
    842 {
    843 	/* {
    844 		syscallarg(int) fd;
    845 		syscallarg(u_long) com;
    846 		syscallarg(void *) data;
    847 	} */
    848 	struct sys_ioctl_args bia;
    849 	u_long com;
    850 	int error, error1;
    851 #if (NWSDISPLAY > 0)
    852 	struct vt_mode lvt;
    853 	struct kbentry kbe;
    854 #endif
    855 	struct linux_hd_geometry hdg;
    856 	struct linux_hd_big_geometry hdg_big;
    857 	struct biosdisk_info *bip;
    858 	file_t *fp;
    859 	int fd;
    860 	struct disklabel label, *labp;
    861 	struct partinfo partp;
    862 	int (*ioctlf)(struct file *, u_long, void *);
    863 	u_long start, biostotal, realtotal;
    864 	u_char heads, sectors;
    865 	u_int cylinders;
    866 	struct ioctl_pt pt;
    867 
    868 	fd = SCARG(uap, fd);
    869 	SCARG(&bia, fd) = fd;
    870 	SCARG(&bia, data) = SCARG(uap, data);
    871 	com = SCARG(uap, com);
    872 
    873 	if ((fp = fd_getfile(fd)) == NULL)
    874 		return (EBADF);
    875 
    876 	switch (com) {
    877 #if (NWSDISPLAY > 0)
    878 	case LINUX_KDGKBMODE:
    879 		com = KDGKBMODE;
    880 		break;
    881 	case LINUX_KDSKBMODE:
    882 		com = KDSKBMODE;
    883 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    884 			SCARG(&bia, data) = (void *)K_RAW;
    885 		break;
    886 	case LINUX_KIOCSOUND:
    887 		SCARG(&bia, data) =
    888 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    889 		/* fall through */
    890 	case LINUX_KDMKTONE:
    891 		com = KDMKTONE;
    892 		break;
    893 	case LINUX_KDSETMODE:
    894 		com = KDSETMODE;
    895 		break;
    896 	case LINUX_KDGETMODE:
    897 		/* KD_* values are equal to the wscons numbers */
    898 		com = WSDISPLAYIO_GMODE;
    899 		break;
    900 	case LINUX_KDENABIO:
    901 		com = KDENABIO;
    902 		break;
    903 	case LINUX_KDDISABIO:
    904 		com = KDDISABIO;
    905 		break;
    906 	case LINUX_KDGETLED:
    907 		com = KDGETLED;
    908 		break;
    909 	case LINUX_KDSETLED:
    910 		com = KDSETLED;
    911 		break;
    912 	case LINUX_VT_OPENQRY:
    913 		com = VT_OPENQRY;
    914 		break;
    915 	case LINUX_VT_GETMODE:
    916 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
    917 		if (error != 0)
    918 			goto out;
    919 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    920 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    921 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    922 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
    923 		goto out;
    924 	case LINUX_VT_SETMODE:
    925 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
    926 		if (error != 0)
    927 			goto out;
    928 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    929 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    930 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    931 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
    932 		goto out;
    933 	case LINUX_VT_DISALLOCATE:
    934 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    935 		error = 0;
    936 		goto out;
    937 	case LINUX_VT_RELDISP:
    938 		com = VT_RELDISP;
    939 		break;
    940 	case LINUX_VT_ACTIVATE:
    941 		com = VT_ACTIVATE;
    942 		break;
    943 	case LINUX_VT_WAITACTIVE:
    944 		com = VT_WAITACTIVE;
    945 		break;
    946 	case LINUX_VT_GETSTATE:
    947 		com = VT_GETSTATE;
    948 		break;
    949 	case LINUX_KDGKBTYPE:
    950 	    {
    951 		static const u_int8_t kb101 = KB_101;
    952 
    953 		/* This is what Linux does. */
    954 		error = copyout(&kb101, SCARG(uap, data), 1);
    955 		goto out;
    956 	    }
    957 	case LINUX_KDGKBENT:
    958 		/*
    959 		 * The Linux KDGKBENT ioctl is different from the
    960 		 * SYSV original. So we handle it in machdep code.
    961 		 * XXX We should use keyboard mapping information
    962 		 * from wsdisplay, but this would be expensive.
    963 		 */
    964 		if ((error = copyin(SCARG(uap, data), &kbe,
    965 				    sizeof(struct kbentry))))
    966 			goto out;
    967 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    968 		    || kbe.kb_index >= NR_KEYS) {
    969 			error = EINVAL;
    970 			goto out;
    971 		}
    972 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    973 		error = copyout(&kbe, SCARG(uap, data),
    974 				sizeof(struct kbentry));
    975 		goto out;
    976 #endif
    977 	case LINUX_HDIO_GETGEO:
    978 	case LINUX_HDIO_GETGEO_BIG:
    979 		/*
    980 		 * Try to mimic Linux behaviour: return the BIOS geometry
    981 		 * if possible (extending its # of cylinders if it's beyond
    982 		 * the 1023 limit), fall back to the MI geometry (i.e.
    983 		 * the real geometry) if not found, by returning an
    984 		 * error. See common/linux_hdio.c
    985 		 */
    986 		bip = fd2biosinfo(curproc, fp);
    987 		ioctlf = fp->f_ops->fo_ioctl;
    988 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
    989 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
    990 		if (error != 0 && error1 != 0) {
    991 			error = error1;
    992 			goto out;
    993 		}
    994 		labp = error != 0 ? &label : partp.disklab;
    995 		start = error1 != 0 ? partp.part->p_offset : 0;
    996 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    997 		    && bip->bi_cyl != 0) {
    998 			heads = bip->bi_head;
    999 			sectors = bip->bi_sec;
   1000 			cylinders = bip->bi_cyl;
   1001 			biostotal = heads * sectors * cylinders;
   1002 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1003 			    labp->d_ncylinders;
   1004 			if (realtotal > biostotal)
   1005 				cylinders = realtotal / (heads * sectors);
   1006 		} else {
   1007 			heads = labp->d_ntracks;
   1008 			cylinders = labp->d_ncylinders;
   1009 			sectors = labp->d_nsectors;
   1010 		}
   1011 		if (com == LINUX_HDIO_GETGEO) {
   1012 			hdg.start = start;
   1013 			hdg.heads = heads;
   1014 			hdg.cylinders = cylinders;
   1015 			hdg.sectors = sectors;
   1016 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1017 			goto out;
   1018 		} else {
   1019 			hdg_big.start = start;
   1020 			hdg_big.heads = heads;
   1021 			hdg_big.cylinders = cylinders;
   1022 			hdg_big.sectors = sectors;
   1023 			error = copyout(&hdg_big, SCARG(uap, data),
   1024 			    sizeof hdg_big);
   1025 			goto out;
   1026 		}
   1027 
   1028 	default:
   1029 		/*
   1030 		 * Unknown to us. If it's on a device, just pass it through
   1031 		 * using PTIOCLINUX, the device itself might be able to
   1032 		 * make some sense of it.
   1033 		 * XXX hack: if the function returns EJUSTRETURN,
   1034 		 * it has stuffed a sysctl return value in pt.data.
   1035 		 */
   1036 		ioctlf = fp->f_ops->fo_ioctl;
   1037 		pt.com = SCARG(uap, com);
   1038 		pt.data = SCARG(uap, data);
   1039 		error = ioctlf(fp, PTIOCLINUX, &pt);
   1040 		if (error == EJUSTRETURN) {
   1041 			retval[0] = (register_t)pt.data;
   1042 			error = 0;
   1043 		}
   1044 
   1045 		if (error == ENOTTY) {
   1046 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1047 			    com));
   1048 		}
   1049 		goto out;
   1050 	}
   1051 	SCARG(&bia, com) = com;
   1052 	error = sys_ioctl(curlwp, &bia, retval);
   1053 out:
   1054 	fd_putfile(fd);
   1055 	return error;
   1056 }
   1057 
   1058 /*
   1059  * Set I/O permissions for a process. Just set the maximum level
   1060  * right away (ignoring the argument), otherwise we would have
   1061  * to rely on I/O permission maps, which are not implemented.
   1062  */
   1063 int
   1064 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
   1065 {
   1066 	/* {
   1067 		syscallarg(int) level;
   1068 	} */
   1069 	struct trapframe *fp = l->l_md.md_regs;
   1070 
   1071 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1072 	    NULL, NULL, NULL, NULL) != 0)
   1073 		return EPERM;
   1074 	fp->tf_eflags |= PSL_IOPL;
   1075 	*retval = 0;
   1076 	return 0;
   1077 }
   1078 
   1079 /*
   1080  * See above. If a root process tries to set access to an I/O port,
   1081  * just let it have the whole range.
   1082  */
   1083 int
   1084 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
   1085 {
   1086 	/* {
   1087 		syscallarg(unsigned int) lo;
   1088 		syscallarg(unsigned int) hi;
   1089 		syscallarg(int) val;
   1090 	} */
   1091 	struct trapframe *fp = l->l_md.md_regs;
   1092 
   1093 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1094 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1095 	    NULL, NULL) != 0)
   1096 		return EPERM;
   1097 	if (SCARG(uap, val))
   1098 		fp->tf_eflags |= PSL_IOPL;
   1099 	*retval = 0;
   1100 	return 0;
   1101 }
   1102 
   1103 int
   1104 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1105     void *arg)
   1106 {
   1107 	return 0;
   1108 }
   1109 
   1110 const char *
   1111 linux_get_uname_arch(void)
   1112 {
   1113 	static char uname_arch[5] = "i386";
   1114 
   1115 	if (uname_arch[1] == '3')
   1116 		uname_arch[1] += cpu_class;
   1117 	return uname_arch;
   1118 }
   1119 
   1120 #ifdef LINUX_NPTL
   1121 void *
   1122 linux_get_newtls(struct lwp *l)
   1123 {
   1124 	struct trapframe *tf = l->l_md.md_regs;
   1125 
   1126 	/* XXX: Implement me */
   1127 	return NULL;
   1128 }
   1129 
   1130 int
   1131 linux_set_newtls(struct lwp *l, void *tls)
   1132 {
   1133 	/* XXX: Implement me */
   1134 	return 0;
   1135 }
   1136 #endif
   1137