Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.62.2.11
      1 /*	$NetBSD: linux_machdep.c,v 1.62.2.11 2002/06/24 22:09:27 nathanw Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.62.2.11 2002/06/24 22:09:27 nathanw Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/map.h>
     52 #include <sys/lwp.h>
     53 #include <sys/proc.h>
     54 #include <sys/user.h>
     55 #include <sys/buf.h>
     56 #include <sys/reboot.h>
     57 #include <sys/conf.h>
     58 #include <sys/exec.h>
     59 #include <sys/file.h>
     60 #include <sys/callout.h>
     61 #include <sys/malloc.h>
     62 #include <sys/mbuf.h>
     63 #include <sys/msgbuf.h>
     64 #include <sys/mount.h>
     65 #include <sys/vnode.h>
     66 #include <sys/device.h>
     67 #include <sys/sa.h>
     68 #include <sys/syscallargs.h>
     69 #include <sys/filedesc.h>
     70 #include <sys/exec_elf.h>
     71 #include <sys/disklabel.h>
     72 #include <sys/ioctl.h>
     73 #include <miscfs/specfs/specdev.h>
     74 
     75 #include <compat/linux/common/linux_types.h>
     76 #include <compat/linux/common/linux_signal.h>
     77 #include <compat/linux/common/linux_util.h>
     78 #include <compat/linux/common/linux_ioctl.h>
     79 #include <compat/linux/common/linux_hdio.h>
     80 #include <compat/linux/common/linux_exec.h>
     81 #include <compat/linux/common/linux_machdep.h>
     82 
     83 #include <compat/linux/linux_syscallargs.h>
     84 
     85 #include <machine/cpu.h>
     86 #include <machine/cpufunc.h>
     87 #include <machine/psl.h>
     88 #include <machine/reg.h>
     89 #include <machine/segments.h>
     90 #include <machine/specialreg.h>
     91 #include <machine/sysarch.h>
     92 #include <machine/vm86.h>
     93 #include <machine/vmparam.h>
     94 
     95 /*
     96  * To see whether wscons is configured (for virtual console ioctl calls).
     97  */
     98 #if defined(_KERNEL_OPT)
     99 #include "wsdisplay.h"
    100 #endif
    101 #if (NWSDISPLAY > 0)
    102 #include <dev/wscons/wsconsio.h>
    103 #include <dev/wscons/wsdisplay_usl_io.h>
    104 #if defined(_KERNEL_OPT)
    105 #include "opt_xserver.h"
    106 #endif
    107 #endif
    108 
    109 #ifdef USER_LDT
    110 #include <machine/cpu.h>
    111 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    112     register_t *));
    113 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    114     register_t *));
    115 #endif
    116 
    117 #ifdef DEBUG_LINUX
    118 #define DPRINTF(a) uprintf a
    119 #else
    120 #define DPRINTF(a)
    121 #endif
    122 
    123 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    124 extern struct disklist *i386_alldisks;
    125 extern const char *findblkname __P((int));
    126 
    127 /*
    128  * Deal with some i386-specific things in the Linux emulation code.
    129  */
    130 
    131 void
    132 linux_setregs(l, epp, stack)
    133 	struct lwp *l;
    134 	struct exec_package *epp;
    135 	u_long stack;
    136 {
    137 	struct pcb *pcb = &l->l_addr->u_pcb;
    138 	struct trapframe *tf;
    139 
    140 #if NNPX > 0
    141 	/* If we were using the FPU, forget about it. */
    142 	if (npxproc == l)
    143 		npxdrop();
    144 #endif
    145 
    146 #ifdef USER_LDT
    147 	pmap_ldt_cleanup(l);
    148 #endif
    149 
    150 	l->l_md.md_flags &= ~MDP_USEDFPU;
    151 	pcb->pcb_flags = 0;
    152 
    153 	if (i386_use_fxsave) {
    154 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    155 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    156 	} else
    157 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    158 
    159 	tf = l->l_md.md_regs;
    160 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    161 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    162 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    163 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    164 	tf->tf_edi = 0;
    165 	tf->tf_esi = 0;
    166 	tf->tf_ebp = 0;
    167 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    168 	tf->tf_edx = 0;
    169 	tf->tf_ecx = 0;
    170 	tf->tf_eax = 0;
    171 	tf->tf_eip = epp->ep_entry;
    172 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    173 	tf->tf_eflags = PSL_USERSET;
    174 	tf->tf_esp = stack;
    175 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    176 }
    177 
    178 /*
    179  * Send an interrupt to process.
    180  *
    181  * Stack is set up to allow sigcode stored
    182  * in u. to call routine, followed by kcall
    183  * to sigreturn routine below.  After sigreturn
    184  * resets the signal mask, the stack, and the
    185  * frame pointer, it returns to the user
    186  * specified pc, psl.
    187  */
    188 
    189 void
    190 linux_sendsig(catcher, sig, mask, code)
    191 	sig_t catcher;
    192 	int sig;
    193 	sigset_t *mask;
    194 	u_long code;
    195 {
    196 	struct lwp *l = curlwp;
    197 	struct proc *p = l->l_proc;
    198 	struct trapframe *tf;
    199 	struct linux_sigframe *fp, frame;
    200 	int onstack;
    201 
    202 	tf = l->l_md.md_regs;
    203 	/* Do we need to jump onto the signal stack? */
    204 	onstack =
    205 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    206 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    207 
    208 	/* Allocate space for the signal handler context. */
    209 	if (onstack)
    210 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    211 					  p->p_sigctx.ps_sigstk.ss_size);
    212 	else
    213 		fp = (struct linux_sigframe *)tf->tf_esp;
    214 	fp--;
    215 
    216 	/* Build stack frame for signal trampoline. */
    217 	frame.sf_handler = catcher;
    218 	frame.sf_sig = native_to_linux_signo[sig];
    219 
    220 	/* Save register context. */
    221 #ifdef VM86
    222 	if (tf->tf_eflags & PSL_VM) {
    223 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    224 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    225 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    226 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    227 		frame.sf_sc.sc_eflags = get_vflags(l);
    228 	} else
    229 #endif
    230 	{
    231 		frame.sf_sc.sc_gs = tf->tf_gs;
    232 		frame.sf_sc.sc_fs = tf->tf_fs;
    233 		frame.sf_sc.sc_es = tf->tf_es;
    234 		frame.sf_sc.sc_ds = tf->tf_ds;
    235 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    236 	}
    237 	frame.sf_sc.sc_edi = tf->tf_edi;
    238 	frame.sf_sc.sc_esi = tf->tf_esi;
    239 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    240 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    241 	frame.sf_sc.sc_edx = tf->tf_edx;
    242 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    243 	frame.sf_sc.sc_eax = tf->tf_eax;
    244 	frame.sf_sc.sc_eip = tf->tf_eip;
    245 	frame.sf_sc.sc_cs = tf->tf_cs;
    246 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    247 	frame.sf_sc.sc_ss = tf->tf_ss;
    248 	frame.sf_sc.sc_err = tf->tf_err;
    249 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    250 	frame.sf_sc.sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    251 
    252 	/* Save signal stack. */
    253 	/* Linux doesn't save the onstack flag in sigframe */
    254 
    255 	/* Save signal mask. */
    256 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
    257 
    258 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    259 		/*
    260 		 * Process has trashed its stack; give it an illegal
    261 		 * instruction to halt it in its tracks.
    262 		 */
    263 		sigexit(l, SIGILL);
    264 		/* NOTREACHED */
    265 	}
    266 
    267 	/*
    268 	 * Build context to run handler in.
    269 	 */
    270 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    271 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    272 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    273 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    274 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    275 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    276 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    277 	tf->tf_esp = (int)fp;
    278 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    279 
    280 	/* Remember that we're now on the signal stack. */
    281 	if (onstack)
    282 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    283 }
    284 
    285 /*
    286  * System call to cleanup state after a signal
    287  * has been taken.  Reset signal mask and
    288  * stack state from context left by sendsig (above).
    289  * Return to previous pc and psl as specified by
    290  * context left by sendsig. Check carefully to
    291  * make sure that the user has not modified the
    292  * psl to gain improper privileges or to cause
    293  * a machine fault.
    294  */
    295 int
    296 linux_sys_rt_sigreturn(l, v, retval)
    297 	struct lwp *l;
    298 	void *v;
    299 	register_t *retval;
    300 {
    301 	/* XXX XAX write me */
    302 	return(ENOSYS);
    303 }
    304 
    305 int
    306 linux_sys_sigreturn(l, v, retval)
    307 	struct lwp *l;
    308 	void *v;
    309 	register_t *retval;
    310 {
    311 	struct linux_sys_sigreturn_args /* {
    312 		syscallarg(struct linux_sigcontext *) scp;
    313 	} */ *uap = v;
    314 	struct proc *p = l->l_proc;
    315 	struct linux_sigcontext *scp, context;
    316 	struct trapframe *tf;
    317 	sigset_t mask;
    318 	ssize_t ss_gap;
    319 
    320 	/*
    321 	 * The trampoline code hands us the context.
    322 	 * It is unsafe to keep track of it ourselves, in the event that a
    323 	 * program jumps out of a signal handler.
    324 	 */
    325 	scp = SCARG(uap, scp);
    326 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    327 		return (EFAULT);
    328 
    329 	/* Restore register context. */
    330 	tf = l->l_md.md_regs;
    331 #ifdef VM86
    332 	if (context.sc_eflags & PSL_VM) {
    333 		tf->tf_vm86_gs = context.sc_gs;
    334 		tf->tf_vm86_fs = context.sc_fs;
    335 		tf->tf_vm86_es = context.sc_es;
    336 		tf->tf_vm86_ds = context.sc_ds;
    337 		set_vflags(l, context.sc_eflags);
    338 	} else
    339 #endif
    340 	{
    341 		/*
    342 		 * Check for security violations.  If we're returning to
    343 		 * protected mode, the CPU will validate the segment registers
    344 		 * automatically and generate a trap on violations.  We handle
    345 		 * the trap, rather than doing all of the checking here.
    346 		 */
    347 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    348 		    !USERMODE(context.sc_cs, context.sc_eflags))
    349 			return (EINVAL);
    350 
    351 		tf->tf_gs = context.sc_gs;
    352 		tf->tf_fs = context.sc_fs;
    353 		tf->tf_es = context.sc_es;
    354 		tf->tf_ds = context.sc_ds;
    355 		tf->tf_eflags = context.sc_eflags;
    356 	}
    357 	tf->tf_edi = context.sc_edi;
    358 	tf->tf_esi = context.sc_esi;
    359 	tf->tf_ebp = context.sc_ebp;
    360 	tf->tf_ebx = context.sc_ebx;
    361 	tf->tf_edx = context.sc_edx;
    362 	tf->tf_ecx = context.sc_ecx;
    363 	tf->tf_eax = context.sc_eax;
    364 	tf->tf_eip = context.sc_eip;
    365 	tf->tf_cs = context.sc_cs;
    366 	tf->tf_esp = context.sc_esp_at_signal;
    367 	tf->tf_ss = context.sc_ss;
    368 
    369 	/* Restore signal stack. */
    370 	/*
    371 	 * Linux really does it this way; it doesn't have space in sigframe
    372 	 * to save the onstack flag.
    373 	 */
    374 	ss_gap = (ssize_t)
    375 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    376 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    377 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    378 	else
    379 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    380 
    381 	/* Restore signal mask. */
    382 	linux_old_to_native_sigset(&mask, &context.sc_mask);
    383 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    384 
    385 	return (EJUSTRETURN);
    386 }
    387 
    388 #ifdef USER_LDT
    389 
    390 int
    391 linux_read_ldt(l, uap, retval)
    392 	struct lwp *l;
    393 	struct linux_sys_modify_ldt_args /* {
    394 		syscallarg(int) func;
    395 		syscallarg(void *) ptr;
    396 		syscallarg(size_t) bytecount;
    397 	} */ *uap;
    398 	register_t *retval;
    399 {
    400 	struct proc *p = l->l_proc;
    401 	struct i386_get_ldt_args gl;
    402 	int error;
    403 	caddr_t sg;
    404 	char *parms;
    405 
    406 	DPRINTF(("linux_read_ldt!"));
    407 	sg = stackgap_init(p, 0);
    408 
    409 	gl.start = 0;
    410 	gl.desc = SCARG(uap, ptr);
    411 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    412 
    413 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    414 
    415 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    416 		return (error);
    417 
    418 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    419 		return (error);
    420 
    421 	*retval *= sizeof(union descriptor);
    422 	return (0);
    423 }
    424 
    425 struct linux_ldt_info {
    426 	u_int entry_number;
    427 	u_long base_addr;
    428 	u_int limit;
    429 	u_int seg_32bit:1;
    430 	u_int contents:2;
    431 	u_int read_exec_only:1;
    432 	u_int limit_in_pages:1;
    433 	u_int seg_not_present:1;
    434 	u_int useable:1;
    435 };
    436 
    437 int
    438 linux_write_ldt(l, uap, retval)
    439 	struct lwp *l;
    440 	struct linux_sys_modify_ldt_args /* {
    441 		syscallarg(int) func;
    442 		syscallarg(void *) ptr;
    443 		syscallarg(size_t) bytecount;
    444 	} */ *uap;
    445 	register_t *retval;
    446 {
    447 	struct proc *p = l->l_proc;
    448 	struct linux_ldt_info ldt_info;
    449 	struct segment_descriptor sd;
    450 	struct i386_set_ldt_args sl;
    451 	int error;
    452 	caddr_t sg;
    453 	char *parms;
    454 	int oldmode = (int)retval[0];
    455 
    456 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    457 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    458 		return (EINVAL);
    459 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    460 		return error;
    461 	if (ldt_info.entry_number >= 8192)
    462 		return (EINVAL);
    463 	if (ldt_info.contents == 3) {
    464 		if (oldmode)
    465 			return (EINVAL);
    466 		if (ldt_info.seg_not_present)
    467 			return (EINVAL);
    468 	}
    469 
    470 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    471 	    (oldmode || (ldt_info.contents == 0 &&
    472 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    473 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    474 	    ldt_info.useable == 0))) {
    475 		/* this means you should zero the ldt */
    476 		(void)memset(&sd, 0, sizeof(sd));
    477 	} else {
    478 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    479 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    480 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    481 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    482 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    483 		    (!ldt_info.read_exec_only << 1);
    484 		sd.sd_dpl = SEL_UPL;
    485 		sd.sd_p = !ldt_info.seg_not_present;
    486 		sd.sd_def32 = ldt_info.seg_32bit;
    487 		sd.sd_gran = ldt_info.limit_in_pages;
    488 		if (!oldmode)
    489 			sd.sd_xx = ldt_info.useable;
    490 		else
    491 			sd.sd_xx = 0;
    492 	}
    493 	sg = stackgap_init(p, 0);
    494 	sl.start = ldt_info.entry_number;
    495 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    496 	sl.num = 1;
    497 
    498 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    499 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    500 
    501 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    502 
    503 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    504 		return (error);
    505 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    506 		return (error);
    507 
    508 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    509 		return (error);
    510 
    511 	*retval = 0;
    512 	return (0);
    513 }
    514 
    515 #endif /* USER_LDT */
    516 
    517 int
    518 linux_sys_modify_ldt(l, v, retval)
    519 	struct lwp *l;
    520 	void *v;
    521 	register_t *retval;
    522 {
    523 	struct linux_sys_modify_ldt_args /* {
    524 		syscallarg(int) func;
    525 		syscallarg(void *) ptr;
    526 		syscallarg(size_t) bytecount;
    527 	} */ *uap = v;
    528 
    529 	switch (SCARG(uap, func)) {
    530 #ifdef USER_LDT
    531 	case 0:
    532 		return linux_read_ldt(l, uap, retval);
    533 	case 1:
    534 		retval[0] = 1;
    535 		return linux_write_ldt(l, uap, retval);
    536 	case 2:
    537 #ifdef notyet
    538 		return (linux_read_default_ldt(l, uap, retval);
    539 #else
    540 		return (ENOSYS);
    541 #endif
    542 	case 0x11:
    543 		retval[0] = 0;
    544 		return linux_write_ldt(l, uap, retval);
    545 #endif /* USER_LDT */
    546 
    547 	default:
    548 		return (ENOSYS);
    549 	}
    550 }
    551 
    552 /*
    553  * XXX Pathetic hack to make svgalib work. This will fake the major
    554  * device number of an opened VT so that svgalib likes it. grmbl.
    555  * Should probably do it 'wrong the right way' and use a mapping
    556  * array for all major device numbers, and map linux_mknod too.
    557  */
    558 dev_t
    559 linux_fakedev(dev, raw)
    560 	dev_t dev;
    561 	int raw;
    562 {
    563 	if (raw) {
    564 #if (NWSDISPLAY > 0)
    565 		if (major(dev) == NETBSD_WSCONS_MAJOR)
    566 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    567 #endif
    568 	}
    569 
    570 	return dev;
    571 }
    572 
    573 #if (NWSDISPLAY > 0)
    574 /*
    575  * That's not complete, but enough to get an X server running.
    576  */
    577 #define NR_KEYS 128
    578 static const u_short plain_map[NR_KEYS] = {
    579 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    580 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    581 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    582 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    583 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    584 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    585 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    586 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    587 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    588 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    589 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    590 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    591 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    592 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    593 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    594 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    595 }, shift_map[NR_KEYS] = {
    596 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    597 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    598 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    599 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    600 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    601 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    602 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    603 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    604 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    605 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    606 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    607 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    608 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    609 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    610 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    611 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    612 }, altgr_map[NR_KEYS] = {
    613 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    614 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    615 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    616 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    617 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    618 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    619 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    620 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    621 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    622 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    623 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    624 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    625 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    626 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    627 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    628 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    629 }, ctrl_map[NR_KEYS] = {
    630 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    631 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    632 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    633 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    634 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    635 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    636 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    637 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    638 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    639 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    640 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    641 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    642 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    643 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    644 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    645 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    646 };
    647 
    648 const u_short * const linux_keytabs[] = {
    649 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    650 };
    651 #endif
    652 
    653 static struct biosdisk_info *
    654 fd2biosinfo(p, fp)
    655 	struct proc *p;
    656 	struct file *fp;
    657 {
    658 	struct vnode *vp;
    659 	const char *blkname;
    660 	char diskname[16];
    661 	int i;
    662 	struct nativedisk_info *nip;
    663 	struct disklist *dl = i386_alldisks;
    664 
    665 	if (fp->f_type != DTYPE_VNODE)
    666 		return NULL;
    667 	vp = (struct vnode *)fp->f_data;
    668 
    669 	if (vp->v_type != VBLK)
    670 		return NULL;
    671 
    672 	blkname = findblkname(major(vp->v_rdev));
    673 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    674 	    DISKUNIT(vp->v_rdev));
    675 
    676 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    677 		nip = &dl->dl_nativedisks[i];
    678 		if (strcmp(diskname, nip->ni_devname))
    679 			continue;
    680 		if (nip->ni_nmatches != 0)
    681 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    682 	}
    683 
    684 	return NULL;
    685 }
    686 
    687 
    688 /*
    689  * We come here in a last attempt to satisfy a Linux ioctl() call
    690  */
    691 int
    692 linux_machdepioctl(p, v, retval)
    693 	struct proc *p;
    694 	void *v;
    695 	register_t *retval;
    696 {
    697 	struct linux_sys_ioctl_args /* {
    698 		syscallarg(int) fd;
    699 		syscallarg(u_long) com;
    700 		syscallarg(caddr_t) data;
    701 	} */ *uap = v;
    702 	struct sys_ioctl_args bia;
    703 	u_long com;
    704 	int error, error1;
    705 #if (NWSDISPLAY > 0)
    706 	struct vt_mode lvt;
    707 	caddr_t bvtp, sg;
    708 	struct kbentry kbe;
    709 #endif
    710 	struct linux_hd_geometry hdg;
    711 	struct linux_hd_big_geometry hdg_big;
    712 	struct biosdisk_info *bip;
    713 	struct filedesc *fdp;
    714 	struct file *fp;
    715 	int fd;
    716 	struct disklabel label, *labp;
    717 	struct partinfo partp;
    718 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    719 	u_long start, biostotal, realtotal;
    720 	u_char heads, sectors;
    721 	u_int cylinders;
    722 	struct ioctl_pt pt;
    723 
    724 	fd = SCARG(uap, fd);
    725 	SCARG(&bia, fd) = fd;
    726 	SCARG(&bia, data) = SCARG(uap, data);
    727 	com = SCARG(uap, com);
    728 
    729 	fdp = p->p_fd;
    730 
    731 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    732 		return (EBADF);
    733 
    734 	switch (com) {
    735 #if (NWSDISPLAY > 0)
    736 	case LINUX_KDGKBMODE:
    737 		com = KDGKBMODE;
    738 		break;
    739 	case LINUX_KDSKBMODE:
    740 		com = KDSKBMODE;
    741 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    742 			SCARG(&bia, data) = (caddr_t)K_RAW;
    743 		break;
    744 	case LINUX_KIOCSOUND:
    745 		SCARG(&bia, data) =
    746 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    747 		/* fall through */
    748 	case LINUX_KDMKTONE:
    749 		com = KDMKTONE;
    750 		break;
    751 	case LINUX_KDSETMODE:
    752 		com = KDSETMODE;
    753 		break;
    754 	case LINUX_KDGETMODE:
    755 		/* KD_* values are equal to the wscons numbers */
    756 		com = WSDISPLAYIO_GMODE;
    757 		break;
    758 	case LINUX_KDENABIO:
    759 		com = KDENABIO;
    760 		break;
    761 	case LINUX_KDDISABIO:
    762 		com = KDDISABIO;
    763 		break;
    764 	case LINUX_KDGETLED:
    765 		com = KDGETLED;
    766 		break;
    767 	case LINUX_KDSETLED:
    768 		com = KDSETLED;
    769 		break;
    770 	case LINUX_VT_OPENQRY:
    771 		com = VT_OPENQRY;
    772 		break;
    773 	case LINUX_VT_GETMODE:
    774 		SCARG(&bia, com) = VT_GETMODE;
    775 		/* XXX NJWLWP */
    776 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    777 			return error;
    778 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    779 		    sizeof (struct vt_mode))))
    780 			return error;
    781 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    782 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    783 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    784 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    785 		    sizeof (struct vt_mode));
    786 	case LINUX_VT_SETMODE:
    787 		com = VT_SETMODE;
    788 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    789 		    sizeof (struct vt_mode))))
    790 			return error;
    791 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    792 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    793 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    794 		sg = stackgap_init(p, 0);
    795 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    796 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    797 			return error;
    798 		SCARG(&bia, data) = bvtp;
    799 		break;
    800 	case LINUX_VT_DISALLOCATE:
    801 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    802 		return 0;
    803 	case LINUX_VT_RELDISP:
    804 		com = VT_RELDISP;
    805 		break;
    806 	case LINUX_VT_ACTIVATE:
    807 		com = VT_ACTIVATE;
    808 		break;
    809 	case LINUX_VT_WAITACTIVE:
    810 		com = VT_WAITACTIVE;
    811 		break;
    812 	case LINUX_VT_GETSTATE:
    813 		com = VT_GETSTATE;
    814 		break;
    815 	case LINUX_KDGKBTYPE:
    816 		/* This is what Linux does. */
    817 		return (subyte(SCARG(uap, data), KB_101));
    818 	case LINUX_KDGKBENT:
    819 		/*
    820 		 * The Linux KDGKBENT ioctl is different from the
    821 		 * SYSV original. So we handle it in machdep code.
    822 		 * XXX We should use keyboard mapping information
    823 		 * from wsdisplay, but this would be expensive.
    824 		 */
    825 		if ((error = copyin(SCARG(uap, data), &kbe,
    826 				    sizeof(struct kbentry))))
    827 			return (error);
    828 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    829 		    || kbe.kb_index >= NR_KEYS)
    830 			return (EINVAL);
    831 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    832 		return (copyout(&kbe, SCARG(uap, data),
    833 				sizeof(struct kbentry)));
    834 #endif
    835 	case LINUX_HDIO_GETGEO:
    836 	case LINUX_HDIO_GETGEO_BIG:
    837 		/*
    838 		 * Try to mimic Linux behaviour: return the BIOS geometry
    839 		 * if possible (extending its # of cylinders if it's beyond
    840 		 * the 1023 limit), fall back to the MI geometry (i.e.
    841 		 * the real geometry) if not found, by returning an
    842 		 * error. See common/linux_hdio.c
    843 		 */
    844 		FILE_USE(fp);
    845 		bip = fd2biosinfo(p, fp);
    846 		ioctlf = fp->f_ops->fo_ioctl;
    847 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    848 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    849 		FILE_UNUSE(fp, p);
    850 		if (error != 0 && error1 != 0)
    851 			return error1;
    852 		labp = error != 0 ? &label : partp.disklab;
    853 		start = error1 != 0 ? partp.part->p_offset : 0;
    854 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    855 		    && bip->bi_cyl != 0) {
    856 			heads = bip->bi_head;
    857 			sectors = bip->bi_sec;
    858 			cylinders = bip->bi_cyl;
    859 			biostotal = heads * sectors * cylinders;
    860 			realtotal = labp->d_ntracks * labp->d_nsectors *
    861 			    labp->d_ncylinders;
    862 			if (realtotal > biostotal)
    863 				cylinders = realtotal / (heads * sectors);
    864 		} else {
    865 			heads = labp->d_ntracks;
    866 			cylinders = labp->d_ncylinders;
    867 			sectors = labp->d_nsectors;
    868 		}
    869 		if (com == LINUX_HDIO_GETGEO) {
    870 			hdg.start = start;
    871 			hdg.heads = heads;
    872 			hdg.cylinders = cylinders;
    873 			hdg.sectors = sectors;
    874 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    875 		} else {
    876 			hdg_big.start = start;
    877 			hdg_big.heads = heads;
    878 			hdg_big.cylinders = cylinders;
    879 			hdg_big.sectors = sectors;
    880 			return copyout(&hdg_big, SCARG(uap, data),
    881 			    sizeof hdg_big);
    882 		}
    883 
    884 	default:
    885 		/*
    886 		 * Unknown to us. If it's on a device, just pass it through
    887 		 * using PTIOCLINUX, the device itself might be able to
    888 		 * make some sense of it.
    889 		 * XXX hack: if the function returns EJUSTRETURN,
    890 		 * it has stuffed a sysctl return value in pt.data.
    891 		 */
    892 		FILE_USE(fp);
    893 		ioctlf = fp->f_ops->fo_ioctl;
    894 		pt.com = SCARG(uap, com);
    895 		pt.data = SCARG(uap, data);
    896 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    897 		FILE_UNUSE(fp, p);
    898 		if (error == EJUSTRETURN) {
    899 			retval[0] = (register_t)pt.data;
    900 			error = 0;
    901 		}
    902 
    903 		if (error == ENOTTY)
    904 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
    905 			    com));
    906 		return error;
    907 	}
    908 	SCARG(&bia, com) = com;
    909 	/* XXX NJWLWP */
    910 	return sys_ioctl(curlwp, &bia, retval);
    911 }
    912 
    913 /*
    914  * Set I/O permissions for a process. Just set the maximum level
    915  * right away (ignoring the argument), otherwise we would have
    916  * to rely on I/O permission maps, which are not implemented.
    917  */
    918 int
    919 linux_sys_iopl(l, v, retval)
    920 	struct lwp *l;
    921 	void *v;
    922 	register_t *retval;
    923 {
    924 #if 0
    925 	struct linux_sys_iopl_args /* {
    926 		syscallarg(int) level;
    927 	} */ *uap = v;
    928 #endif
    929 	struct proc *p = l->l_proc;
    930 	struct trapframe *fp = l->l_md.md_regs;
    931 
    932 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    933 		return EPERM;
    934 	fp->tf_eflags |= PSL_IOPL;
    935 	*retval = 0;
    936 	return 0;
    937 }
    938 
    939 /*
    940  * See above. If a root process tries to set access to an I/O port,
    941  * just let it have the whole range.
    942  */
    943 int
    944 linux_sys_ioperm(l, v, retval)
    945 	struct lwp *l;
    946 	void *v;
    947 	register_t *retval;
    948 {
    949 	struct linux_sys_ioperm_args /* {
    950 		syscallarg(unsigned int) lo;
    951 		syscallarg(unsigned int) hi;
    952 		syscallarg(int) val;
    953 	} */ *uap = v;
    954 	struct proc *p = l->l_proc;
    955 	struct trapframe *fp = l->l_md.md_regs;
    956 
    957 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    958 		return EPERM;
    959 	if (SCARG(uap, val))
    960 		fp->tf_eflags |= PSL_IOPL;
    961 	*retval = 0;
    962 	return 0;
    963 }
    964