Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.65.2.5
      1 /*	$NetBSD: linux_machdep.c,v 1.65.2.5 2002/09/06 08:43:13 jdolecek Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.65.2.5 2002/09/06 08:43:13 jdolecek Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/map.h>
     52 #include <sys/proc.h>
     53 #include <sys/user.h>
     54 #include <sys/buf.h>
     55 #include <sys/reboot.h>
     56 #include <sys/conf.h>
     57 #include <sys/exec.h>
     58 #include <sys/file.h>
     59 #include <sys/callout.h>
     60 #include <sys/malloc.h>
     61 #include <sys/mbuf.h>
     62 #include <sys/msgbuf.h>
     63 #include <sys/mount.h>
     64 #include <sys/vnode.h>
     65 #include <sys/device.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <miscfs/specfs/specdev.h>
     72 
     73 #include <compat/linux/common/linux_types.h>
     74 #include <compat/linux/common/linux_signal.h>
     75 #include <compat/linux/common/linux_util.h>
     76 #include <compat/linux/common/linux_ioctl.h>
     77 #include <compat/linux/common/linux_hdio.h>
     78 #include <compat/linux/common/linux_exec.h>
     79 #include <compat/linux/common/linux_machdep.h>
     80 
     81 #include <compat/linux/linux_syscallargs.h>
     82 
     83 #include <machine/cpu.h>
     84 #include <machine/cpufunc.h>
     85 #include <machine/psl.h>
     86 #include <machine/reg.h>
     87 #include <machine/segments.h>
     88 #include <machine/specialreg.h>
     89 #include <machine/sysarch.h>
     90 #include <machine/vm86.h>
     91 #include <machine/vmparam.h>
     92 
     93 /*
     94  * To see whether wscons is configured (for virtual console ioctl calls).
     95  */
     96 #if defined(_KERNEL_OPT)
     97 #include "wsdisplay.h"
     98 #endif
     99 #if (NWSDISPLAY > 0)
    100 #include <dev/wscons/wsconsio.h>
    101 #include <dev/wscons/wsdisplay_usl_io.h>
    102 #if defined(_KERNEL_OPT)
    103 #include "opt_xserver.h"
    104 #endif
    105 #endif
    106 
    107 #ifdef USER_LDT
    108 #include <machine/cpu.h>
    109 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    110     register_t *));
    111 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    112     register_t *));
    113 #endif
    114 
    115 #ifdef DEBUG_LINUX
    116 #define DPRINTF(a) uprintf a
    117 #else
    118 #define DPRINTF(a)
    119 #endif
    120 
    121 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    122 extern struct disklist *i386_alldisks;
    123 extern const char *findblkname __P((int));
    124 
    125 /*
    126  * Deal with some i386-specific things in the Linux emulation code.
    127  */
    128 
    129 void
    130 linux_setregs(p, epp, stack)
    131 	struct proc *p;
    132 	struct exec_package *epp;
    133 	u_long stack;
    134 {
    135 	struct pcb *pcb = &p->p_addr->u_pcb;
    136 	struct trapframe *tf;
    137 
    138 #if NNPX > 0
    139 	/* If we were using the FPU, forget about it. */
    140 	if (npxproc == p)
    141 		npxdrop();
    142 #endif
    143 
    144 #ifdef USER_LDT
    145 	pmap_ldt_cleanup(p);
    146 #endif
    147 
    148 	p->p_md.md_flags &= ~MDP_USEDFPU;
    149 	pcb->pcb_flags = 0;
    150 
    151 	if (i386_use_fxsave) {
    152 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    153 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    154 	} else
    155 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    156 
    157 	tf = p->p_md.md_regs;
    158 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    159 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    160 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    161 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    162 	tf->tf_edi = 0;
    163 	tf->tf_esi = 0;
    164 	tf->tf_ebp = 0;
    165 	tf->tf_ebx = (int)p->p_psstr;
    166 	tf->tf_edx = 0;
    167 	tf->tf_ecx = 0;
    168 	tf->tf_eax = 0;
    169 	tf->tf_eip = epp->ep_entry;
    170 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    171 	tf->tf_eflags = PSL_USERSET;
    172 	tf->tf_esp = stack;
    173 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    174 }
    175 
    176 /*
    177  * Send an interrupt to process.
    178  *
    179  * Stack is set up to allow sigcode stored
    180  * in u. to call routine, followed by kcall
    181  * to sigreturn routine below.  After sigreturn
    182  * resets the signal mask, the stack, and the
    183  * frame pointer, it returns to the user
    184  * specified pc, psl.
    185  */
    186 
    187 void
    188 linux_sendsig(sig, mask, code)
    189 	int sig;
    190 	sigset_t *mask;
    191 	u_long code;
    192 {
    193 	struct proc *p = curproc;
    194 	struct trapframe *tf;
    195 	struct linux_sigframe *fp, frame;
    196 	int onstack;
    197 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    198 
    199 	tf = p->p_md.md_regs;
    200 
    201 	/* Do we need to jump onto the signal stack? */
    202 	onstack =
    203 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    204 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    205 
    206 	/* Allocate space for the signal handler context. */
    207 	if (onstack)
    208 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    209 					  p->p_sigctx.ps_sigstk.ss_size);
    210 	else
    211 		fp = (struct linux_sigframe *)tf->tf_esp;
    212 	fp--;
    213 
    214 	/* Build stack frame for signal trampoline. */
    215 	frame.sf_handler = catcher;
    216 	frame.sf_sig = native_to_linux_signo[sig];
    217 
    218 	/* Save register context. */
    219 #ifdef VM86
    220 	if (tf->tf_eflags & PSL_VM) {
    221 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    222 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    223 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    224 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    225 		frame.sf_sc.sc_eflags = get_vflags(p);
    226 	} else
    227 #endif
    228 	{
    229 		frame.sf_sc.sc_gs = tf->tf_gs;
    230 		frame.sf_sc.sc_fs = tf->tf_fs;
    231 		frame.sf_sc.sc_es = tf->tf_es;
    232 		frame.sf_sc.sc_ds = tf->tf_ds;
    233 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    234 	}
    235 	frame.sf_sc.sc_edi = tf->tf_edi;
    236 	frame.sf_sc.sc_esi = tf->tf_esi;
    237 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    238 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    239 	frame.sf_sc.sc_edx = tf->tf_edx;
    240 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    241 	frame.sf_sc.sc_eax = tf->tf_eax;
    242 	frame.sf_sc.sc_eip = tf->tf_eip;
    243 	frame.sf_sc.sc_cs = tf->tf_cs;
    244 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    245 	frame.sf_sc.sc_ss = tf->tf_ss;
    246 	frame.sf_sc.sc_err = tf->tf_err;
    247 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    248 	frame.sf_sc.sc_cr2 = p->p_addr->u_pcb.pcb_cr2;
    249 
    250 	/* Save signal stack. */
    251 	/* Linux doesn't save the onstack flag in sigframe */
    252 
    253 	/* Save signal mask. */
    254 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
    255 
    256 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    257 		/*
    258 		 * Process has trashed its stack; give it an illegal
    259 		 * instruction to halt it in its tracks.
    260 		 */
    261 		sigexit(p, SIGILL);
    262 		/* NOTREACHED */
    263 	}
    264 
    265 	/*
    266 	 * Build context to run handler in.
    267 	 */
    268 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    269 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    270 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    271 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    272 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    273 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    274 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    275 	tf->tf_esp = (int)fp;
    276 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    277 
    278 	/* Remember that we're now on the signal stack. */
    279 	if (onstack)
    280 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    281 }
    282 
    283 /*
    284  * System call to cleanup state after a signal
    285  * has been taken.  Reset signal mask and
    286  * stack state from context left by sendsig (above).
    287  * Return to previous pc and psl as specified by
    288  * context left by sendsig. Check carefully to
    289  * make sure that the user has not modified the
    290  * psl to gain improper privileges or to cause
    291  * a machine fault.
    292  */
    293 int
    294 linux_sys_rt_sigreturn(p, v, retval)
    295 	struct proc *p;
    296 	void *v;
    297 	register_t *retval;
    298 {
    299 	/* XXX XAX write me */
    300 	return(ENOSYS);
    301 }
    302 
    303 int
    304 linux_sys_sigreturn(p, v, retval)
    305 	struct proc *p;
    306 	void *v;
    307 	register_t *retval;
    308 {
    309 	struct linux_sys_sigreturn_args /* {
    310 		syscallarg(struct linux_sigcontext *) scp;
    311 	} */ *uap = v;
    312 	struct linux_sigcontext *scp, context;
    313 	struct trapframe *tf;
    314 	sigset_t mask;
    315 	ssize_t ss_gap;
    316 
    317 	/*
    318 	 * The trampoline code hands us the context.
    319 	 * It is unsafe to keep track of it ourselves, in the event that a
    320 	 * program jumps out of a signal handler.
    321 	 */
    322 	scp = SCARG(uap, scp);
    323 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    324 		return (EFAULT);
    325 
    326 	/* Restore register context. */
    327 	tf = p->p_md.md_regs;
    328 #ifdef VM86
    329 	if (context.sc_eflags & PSL_VM) {
    330 		tf->tf_vm86_gs = context.sc_gs;
    331 		tf->tf_vm86_fs = context.sc_fs;
    332 		tf->tf_vm86_es = context.sc_es;
    333 		tf->tf_vm86_ds = context.sc_ds;
    334 		set_vflags(p, context.sc_eflags);
    335 	} else
    336 #endif
    337 	{
    338 		/*
    339 		 * Check for security violations.  If we're returning to
    340 		 * protected mode, the CPU will validate the segment registers
    341 		 * automatically and generate a trap on violations.  We handle
    342 		 * the trap, rather than doing all of the checking here.
    343 		 */
    344 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    345 		    !USERMODE(context.sc_cs, context.sc_eflags))
    346 			return (EINVAL);
    347 
    348 		tf->tf_gs = context.sc_gs;
    349 		tf->tf_fs = context.sc_fs;
    350 		tf->tf_es = context.sc_es;
    351 		tf->tf_ds = context.sc_ds;
    352 		tf->tf_eflags = context.sc_eflags;
    353 	}
    354 	tf->tf_edi = context.sc_edi;
    355 	tf->tf_esi = context.sc_esi;
    356 	tf->tf_ebp = context.sc_ebp;
    357 	tf->tf_ebx = context.sc_ebx;
    358 	tf->tf_edx = context.sc_edx;
    359 	tf->tf_ecx = context.sc_ecx;
    360 	tf->tf_eax = context.sc_eax;
    361 	tf->tf_eip = context.sc_eip;
    362 	tf->tf_cs = context.sc_cs;
    363 	tf->tf_esp = context.sc_esp_at_signal;
    364 	tf->tf_ss = context.sc_ss;
    365 
    366 	/* Restore signal stack. */
    367 	/*
    368 	 * Linux really does it this way; it doesn't have space in sigframe
    369 	 * to save the onstack flag.
    370 	 */
    371 	ss_gap = (ssize_t)
    372 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    373 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    374 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    375 	else
    376 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    377 
    378 	/* Restore signal mask. */
    379 	linux_old_to_native_sigset(&mask, &context.sc_mask);
    380 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    381 
    382 	return (EJUSTRETURN);
    383 }
    384 
    385 #ifdef USER_LDT
    386 
    387 int
    388 linux_read_ldt(p, uap, retval)
    389 	struct proc *p;
    390 	struct linux_sys_modify_ldt_args /* {
    391 		syscallarg(int) func;
    392 		syscallarg(void *) ptr;
    393 		syscallarg(size_t) bytecount;
    394 	} */ *uap;
    395 	register_t *retval;
    396 {
    397 	struct i386_get_ldt_args gl;
    398 	int error;
    399 	caddr_t sg;
    400 	char *parms;
    401 
    402 	DPRINTF(("linux_read_ldt!"));
    403 	sg = stackgap_init(p, 0);
    404 
    405 	gl.start = 0;
    406 	gl.desc = SCARG(uap, ptr);
    407 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    408 
    409 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    410 
    411 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    412 		return (error);
    413 
    414 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
    415 		return (error);
    416 
    417 	*retval *= sizeof(union descriptor);
    418 	return (0);
    419 }
    420 
    421 struct linux_ldt_info {
    422 	u_int entry_number;
    423 	u_long base_addr;
    424 	u_int limit;
    425 	u_int seg_32bit:1;
    426 	u_int contents:2;
    427 	u_int read_exec_only:1;
    428 	u_int limit_in_pages:1;
    429 	u_int seg_not_present:1;
    430 	u_int useable:1;
    431 };
    432 
    433 int
    434 linux_write_ldt(p, uap, retval)
    435 	struct proc *p;
    436 	struct linux_sys_modify_ldt_args /* {
    437 		syscallarg(int) func;
    438 		syscallarg(void *) ptr;
    439 		syscallarg(size_t) bytecount;
    440 	} */ *uap;
    441 	register_t *retval;
    442 {
    443 	struct linux_ldt_info ldt_info;
    444 	struct segment_descriptor sd;
    445 	struct i386_set_ldt_args sl;
    446 	int error;
    447 	caddr_t sg;
    448 	char *parms;
    449 	int oldmode = (int)retval[0];
    450 
    451 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    452 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    453 		return (EINVAL);
    454 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    455 		return error;
    456 	if (ldt_info.entry_number >= 8192)
    457 		return (EINVAL);
    458 	if (ldt_info.contents == 3) {
    459 		if (oldmode)
    460 			return (EINVAL);
    461 		if (ldt_info.seg_not_present)
    462 			return (EINVAL);
    463 	}
    464 
    465 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    466 	    (oldmode || (ldt_info.contents == 0 &&
    467 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    468 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    469 	    ldt_info.useable == 0))) {
    470 		/* this means you should zero the ldt */
    471 		(void)memset(&sd, 0, sizeof(sd));
    472 	} else {
    473 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    474 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    475 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    476 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    477 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    478 		    (!ldt_info.read_exec_only << 1);
    479 		sd.sd_dpl = SEL_UPL;
    480 		sd.sd_p = !ldt_info.seg_not_present;
    481 		sd.sd_def32 = ldt_info.seg_32bit;
    482 		sd.sd_gran = ldt_info.limit_in_pages;
    483 		if (!oldmode)
    484 			sd.sd_xx = ldt_info.useable;
    485 		else
    486 			sd.sd_xx = 0;
    487 	}
    488 	sg = stackgap_init(p, 0);
    489 	sl.start = ldt_info.entry_number;
    490 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    491 	sl.num = 1;
    492 
    493 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    494 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    495 
    496 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    497 
    498 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    499 		return (error);
    500 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    501 		return (error);
    502 
    503 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
    504 		return (error);
    505 
    506 	*retval = 0;
    507 	return (0);
    508 }
    509 
    510 #endif /* USER_LDT */
    511 
    512 int
    513 linux_sys_modify_ldt(p, v, retval)
    514 	struct proc *p;
    515 	void *v;
    516 	register_t *retval;
    517 {
    518 	struct linux_sys_modify_ldt_args /* {
    519 		syscallarg(int) func;
    520 		syscallarg(void *) ptr;
    521 		syscallarg(size_t) bytecount;
    522 	} */ *uap = v;
    523 
    524 	switch (SCARG(uap, func)) {
    525 #ifdef USER_LDT
    526 	case 0:
    527 		return linux_read_ldt(p, uap, retval);
    528 	case 1:
    529 		retval[0] = 1;
    530 		return linux_write_ldt(p, uap, retval);
    531 	case 2:
    532 #ifdef notyet
    533 		return (linux_read_default_ldt(p, uap, retval);
    534 #else
    535 		return (ENOSYS);
    536 #endif
    537 	case 0x11:
    538 		retval[0] = 0;
    539 		return linux_write_ldt(p, uap, retval);
    540 #endif /* USER_LDT */
    541 
    542 	default:
    543 		return (ENOSYS);
    544 	}
    545 }
    546 
    547 /*
    548  * XXX Pathetic hack to make svgalib work. This will fake the major
    549  * device number of an opened VT so that svgalib likes it. grmbl.
    550  * Should probably do it 'wrong the right way' and use a mapping
    551  * array for all major device numbers, and map linux_mknod too.
    552  */
    553 dev_t
    554 linux_fakedev(dev, raw)
    555 	dev_t dev;
    556 	int raw;
    557 {
    558 	if (raw) {
    559 #if (NWSDISPLAY > 0)
    560 		if (major(dev) == NETBSD_WSCONS_MAJOR)
    561 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    562 #endif
    563 	}
    564 
    565 	return dev;
    566 }
    567 
    568 #if (NWSDISPLAY > 0)
    569 /*
    570  * That's not complete, but enough to get an X server running.
    571  */
    572 #define NR_KEYS 128
    573 static const u_short plain_map[NR_KEYS] = {
    574 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    575 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    576 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    577 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    578 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    579 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    580 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    581 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    582 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    583 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    584 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    585 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    586 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    587 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    588 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    589 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    590 }, shift_map[NR_KEYS] = {
    591 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    592 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    593 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    594 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    595 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    596 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    597 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    598 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    599 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    600 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    601 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    602 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    603 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    604 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    605 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    606 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    607 }, altgr_map[NR_KEYS] = {
    608 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    609 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    610 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    611 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    612 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    613 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    614 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    615 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    616 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    617 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    618 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    619 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    620 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    621 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    622 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    623 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    624 }, ctrl_map[NR_KEYS] = {
    625 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    626 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    627 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    628 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    629 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    630 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    631 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    632 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    633 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    634 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    635 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    636 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    637 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    638 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    639 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    640 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    641 };
    642 
    643 const u_short * const linux_keytabs[] = {
    644 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    645 };
    646 #endif
    647 
    648 static struct biosdisk_info *
    649 fd2biosinfo(p, fp)
    650 	struct proc *p;
    651 	struct file *fp;
    652 {
    653 	struct vnode *vp;
    654 	const char *blkname;
    655 	char diskname[16];
    656 	int i;
    657 	struct nativedisk_info *nip;
    658 	struct disklist *dl = i386_alldisks;
    659 
    660 	if (fp->f_type != DTYPE_VNODE)
    661 		return NULL;
    662 	vp = (struct vnode *)fp->f_data;
    663 
    664 	if (vp->v_type != VBLK)
    665 		return NULL;
    666 
    667 	blkname = findblkname(major(vp->v_rdev));
    668 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    669 	    DISKUNIT(vp->v_rdev));
    670 
    671 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    672 		nip = &dl->dl_nativedisks[i];
    673 		if (strcmp(diskname, nip->ni_devname))
    674 			continue;
    675 		if (nip->ni_nmatches != 0)
    676 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    677 	}
    678 
    679 	return NULL;
    680 }
    681 
    682 
    683 /*
    684  * We come here in a last attempt to satisfy a Linux ioctl() call
    685  */
    686 int
    687 linux_machdepioctl(p, v, retval)
    688 	struct proc *p;
    689 	void *v;
    690 	register_t *retval;
    691 {
    692 	struct linux_sys_ioctl_args /* {
    693 		syscallarg(int) fd;
    694 		syscallarg(u_long) com;
    695 		syscallarg(caddr_t) data;
    696 	} */ *uap = v;
    697 	struct sys_ioctl_args bia;
    698 	u_long com;
    699 	int error, error1;
    700 #if (NWSDISPLAY > 0)
    701 	struct vt_mode lvt;
    702 	caddr_t bvtp, sg;
    703 	struct kbentry kbe;
    704 #endif
    705 	struct linux_hd_geometry hdg;
    706 	struct linux_hd_big_geometry hdg_big;
    707 	struct biosdisk_info *bip;
    708 	struct filedesc *fdp;
    709 	struct file *fp;
    710 	int fd;
    711 	struct disklabel label, *labp;
    712 	struct partinfo partp;
    713 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    714 	u_long start, biostotal, realtotal;
    715 	u_char heads, sectors;
    716 	u_int cylinders;
    717 	struct ioctl_pt pt;
    718 
    719 	fd = SCARG(uap, fd);
    720 	SCARG(&bia, fd) = fd;
    721 	SCARG(&bia, data) = SCARG(uap, data);
    722 	com = SCARG(uap, com);
    723 
    724 	fdp = p->p_fd;
    725 
    726 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    727 		return (EBADF);
    728 
    729 	switch (com) {
    730 #if (NWSDISPLAY > 0)
    731 	case LINUX_KDGKBMODE:
    732 		com = KDGKBMODE;
    733 		break;
    734 	case LINUX_KDSKBMODE:
    735 		com = KDSKBMODE;
    736 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    737 			SCARG(&bia, data) = (caddr_t)K_RAW;
    738 		break;
    739 	case LINUX_KIOCSOUND:
    740 		SCARG(&bia, data) =
    741 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    742 		/* fall through */
    743 	case LINUX_KDMKTONE:
    744 		com = KDMKTONE;
    745 		break;
    746 	case LINUX_KDSETMODE:
    747 		com = KDSETMODE;
    748 		break;
    749 	case LINUX_KDGETMODE:
    750 		/* KD_* values are equal to the wscons numbers */
    751 		com = WSDISPLAYIO_GMODE;
    752 		break;
    753 	case LINUX_KDENABIO:
    754 		com = KDENABIO;
    755 		break;
    756 	case LINUX_KDDISABIO:
    757 		com = KDDISABIO;
    758 		break;
    759 	case LINUX_KDGETLED:
    760 		com = KDGETLED;
    761 		break;
    762 	case LINUX_KDSETLED:
    763 		com = KDSETLED;
    764 		break;
    765 	case LINUX_VT_OPENQRY:
    766 		com = VT_OPENQRY;
    767 		break;
    768 	case LINUX_VT_GETMODE:
    769 		SCARG(&bia, com) = VT_GETMODE;
    770 		if ((error = sys_ioctl(p, &bia, retval)))
    771 			return error;
    772 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    773 		    sizeof (struct vt_mode))))
    774 			return error;
    775 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    776 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    777 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    778 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    779 		    sizeof (struct vt_mode));
    780 	case LINUX_VT_SETMODE:
    781 		com = VT_SETMODE;
    782 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    783 		    sizeof (struct vt_mode))))
    784 			return error;
    785 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    786 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    787 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    788 		sg = stackgap_init(p, 0);
    789 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    790 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    791 			return error;
    792 		SCARG(&bia, data) = bvtp;
    793 		break;
    794 	case LINUX_VT_DISALLOCATE:
    795 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    796 		return 0;
    797 	case LINUX_VT_RELDISP:
    798 		com = VT_RELDISP;
    799 		break;
    800 	case LINUX_VT_ACTIVATE:
    801 		com = VT_ACTIVATE;
    802 		break;
    803 	case LINUX_VT_WAITACTIVE:
    804 		com = VT_WAITACTIVE;
    805 		break;
    806 	case LINUX_VT_GETSTATE:
    807 		com = VT_GETSTATE;
    808 		break;
    809 	case LINUX_KDGKBTYPE:
    810 		/* This is what Linux does. */
    811 		return (subyte(SCARG(uap, data), KB_101));
    812 	case LINUX_KDGKBENT:
    813 		/*
    814 		 * The Linux KDGKBENT ioctl is different from the
    815 		 * SYSV original. So we handle it in machdep code.
    816 		 * XXX We should use keyboard mapping information
    817 		 * from wsdisplay, but this would be expensive.
    818 		 */
    819 		if ((error = copyin(SCARG(uap, data), &kbe,
    820 				    sizeof(struct kbentry))))
    821 			return (error);
    822 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    823 		    || kbe.kb_index >= NR_KEYS)
    824 			return (EINVAL);
    825 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    826 		return (copyout(&kbe, SCARG(uap, data),
    827 				sizeof(struct kbentry)));
    828 #endif
    829 	case LINUX_HDIO_GETGEO:
    830 	case LINUX_HDIO_GETGEO_BIG:
    831 		/*
    832 		 * Try to mimic Linux behaviour: return the BIOS geometry
    833 		 * if possible (extending its # of cylinders if it's beyond
    834 		 * the 1023 limit), fall back to the MI geometry (i.e.
    835 		 * the real geometry) if not found, by returning an
    836 		 * error. See common/linux_hdio.c
    837 		 */
    838 		FILE_USE(fp);
    839 		bip = fd2biosinfo(p, fp);
    840 		ioctlf = fp->f_ops->fo_ioctl;
    841 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    842 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    843 		FILE_UNUSE(fp, p);
    844 		if (error != 0 && error1 != 0)
    845 			return error1;
    846 		labp = error != 0 ? &label : partp.disklab;
    847 		start = error1 != 0 ? partp.part->p_offset : 0;
    848 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    849 		    && bip->bi_cyl != 0) {
    850 			heads = bip->bi_head;
    851 			sectors = bip->bi_sec;
    852 			cylinders = bip->bi_cyl;
    853 			biostotal = heads * sectors * cylinders;
    854 			realtotal = labp->d_ntracks * labp->d_nsectors *
    855 			    labp->d_ncylinders;
    856 			if (realtotal > biostotal)
    857 				cylinders = realtotal / (heads * sectors);
    858 		} else {
    859 			heads = labp->d_ntracks;
    860 			cylinders = labp->d_ncylinders;
    861 			sectors = labp->d_nsectors;
    862 		}
    863 		if (com == LINUX_HDIO_GETGEO) {
    864 			hdg.start = start;
    865 			hdg.heads = heads;
    866 			hdg.cylinders = cylinders;
    867 			hdg.sectors = sectors;
    868 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    869 		} else {
    870 			hdg_big.start = start;
    871 			hdg_big.heads = heads;
    872 			hdg_big.cylinders = cylinders;
    873 			hdg_big.sectors = sectors;
    874 			return copyout(&hdg_big, SCARG(uap, data),
    875 			    sizeof hdg_big);
    876 		}
    877 
    878 	default:
    879 		/*
    880 		 * Unknown to us. If it's on a device, just pass it through
    881 		 * using PTIOCLINUX, the device itself might be able to
    882 		 * make some sense of it.
    883 		 * XXX hack: if the function returns EJUSTRETURN,
    884 		 * it has stuffed a sysctl return value in pt.data.
    885 		 */
    886 		FILE_USE(fp);
    887 		ioctlf = fp->f_ops->fo_ioctl;
    888 		pt.com = SCARG(uap, com);
    889 		pt.data = SCARG(uap, data);
    890 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    891 		FILE_UNUSE(fp, p);
    892 		if (error == EJUSTRETURN) {
    893 			retval[0] = (register_t)pt.data;
    894 			error = 0;
    895 		}
    896 
    897 		if (error == ENOTTY)
    898 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
    899 			    com));
    900 		return error;
    901 	}
    902 	SCARG(&bia, com) = com;
    903 	return sys_ioctl(p, &bia, retval);
    904 }
    905 
    906 /*
    907  * Set I/O permissions for a process. Just set the maximum level
    908  * right away (ignoring the argument), otherwise we would have
    909  * to rely on I/O permission maps, which are not implemented.
    910  */
    911 int
    912 linux_sys_iopl(p, v, retval)
    913 	struct proc *p;
    914 	void *v;
    915 	register_t *retval;
    916 {
    917 #if 0
    918 	struct linux_sys_iopl_args /* {
    919 		syscallarg(int) level;
    920 	} */ *uap = v;
    921 #endif
    922 	struct trapframe *fp = p->p_md.md_regs;
    923 
    924 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    925 		return EPERM;
    926 	fp->tf_eflags |= PSL_IOPL;
    927 	*retval = 0;
    928 	return 0;
    929 }
    930 
    931 /*
    932  * See above. If a root process tries to set access to an I/O port,
    933  * just let it have the whole range.
    934  */
    935 int
    936 linux_sys_ioperm(p, v, retval)
    937 	struct proc *p;
    938 	void *v;
    939 	register_t *retval;
    940 {
    941 	struct linux_sys_ioperm_args /* {
    942 		syscallarg(unsigned int) lo;
    943 		syscallarg(unsigned int) hi;
    944 		syscallarg(int) val;
    945 	} */ *uap = v;
    946 	struct trapframe *fp = p->p_md.md_regs;
    947 
    948 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    949 		return EPERM;
    950 	if (SCARG(uap, val))
    951 		fp->tf_eflags |= PSL_IOPL;
    952 	*retval = 0;
    953 	return 0;
    954 }
    955