Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.76.2.1
      1 /*	$NetBSD: linux_machdep.c,v 1.76.2.1 2002/05/16 04:27:03 gehenna Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.76.2.1 2002/05/16 04:27:03 gehenna Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/map.h>
     52 #include <sys/proc.h>
     53 #include <sys/user.h>
     54 #include <sys/buf.h>
     55 #include <sys/reboot.h>
     56 #include <sys/conf.h>
     57 #include <sys/exec.h>
     58 #include <sys/file.h>
     59 #include <sys/callout.h>
     60 #include <sys/malloc.h>
     61 #include <sys/mbuf.h>
     62 #include <sys/msgbuf.h>
     63 #include <sys/mount.h>
     64 #include <sys/vnode.h>
     65 #include <sys/device.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <miscfs/specfs/specdev.h>
     72 
     73 #include <compat/linux/common/linux_types.h>
     74 #include <compat/linux/common/linux_signal.h>
     75 #include <compat/linux/common/linux_util.h>
     76 #include <compat/linux/common/linux_ioctl.h>
     77 #include <compat/linux/common/linux_hdio.h>
     78 #include <compat/linux/common/linux_exec.h>
     79 #include <compat/linux/common/linux_machdep.h>
     80 
     81 #include <compat/linux/linux_syscallargs.h>
     82 
     83 #include <machine/cpu.h>
     84 #include <machine/cpufunc.h>
     85 #include <machine/psl.h>
     86 #include <machine/reg.h>
     87 #include <machine/segments.h>
     88 #include <machine/specialreg.h>
     89 #include <machine/sysarch.h>
     90 #include <machine/vm86.h>
     91 #include <machine/vmparam.h>
     92 
     93 /*
     94  * To see whether wscons is configured (for virtual console ioctl calls).
     95  */
     96 #if defined(_KERNEL_OPT)
     97 #include "wsdisplay.h"
     98 #endif
     99 #if (NWSDISPLAY > 0)
    100 #include <dev/wscons/wsconsio.h>
    101 #include <dev/wscons/wsdisplay_usl_io.h>
    102 #if defined(_KERNEL_OPT)
    103 #include "opt_xserver.h"
    104 #endif
    105 #endif
    106 
    107 #ifdef USER_LDT
    108 #include <machine/cpu.h>
    109 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    110     register_t *));
    111 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    112     register_t *));
    113 #endif
    114 
    115 #ifdef DEBUG_LINUX
    116 #define DPRINTF(a) uprintf a
    117 #else
    118 #define DPRINTF(a)
    119 #endif
    120 
    121 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    122 extern struct disklist *i386_alldisks;
    123 
    124 /*
    125  * Deal with some i386-specific things in the Linux emulation code.
    126  */
    127 
    128 void
    129 linux_setregs(p, epp, stack)
    130 	struct proc *p;
    131 	struct exec_package *epp;
    132 	u_long stack;
    133 {
    134 	struct pcb *pcb = &p->p_addr->u_pcb;
    135 	struct trapframe *tf;
    136 
    137 #if NNPX > 0
    138 	/* If we were using the FPU, forget about it. */
    139 	if (npxproc == p)
    140 		npxdrop();
    141 #endif
    142 
    143 #ifdef USER_LDT
    144 	pmap_ldt_cleanup(p);
    145 #endif
    146 
    147 	p->p_md.md_flags &= ~MDP_USEDFPU;
    148 	pcb->pcb_flags = 0;
    149 
    150 	if (i386_use_fxsave) {
    151 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    152 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    153 	} else
    154 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    155 
    156 	tf = p->p_md.md_regs;
    157 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    158 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    159 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    160 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    161 	tf->tf_edi = 0;
    162 	tf->tf_esi = 0;
    163 	tf->tf_ebp = 0;
    164 	tf->tf_ebx = (int)p->p_psstr;
    165 	tf->tf_edx = 0;
    166 	tf->tf_ecx = 0;
    167 	tf->tf_eax = 0;
    168 	tf->tf_eip = epp->ep_entry;
    169 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    170 	tf->tf_eflags = PSL_USERSET;
    171 	tf->tf_esp = stack;
    172 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    173 }
    174 
    175 /*
    176  * Send an interrupt to process.
    177  *
    178  * Stack is set up to allow sigcode stored
    179  * in u. to call routine, followed by kcall
    180  * to sigreturn routine below.  After sigreturn
    181  * resets the signal mask, the stack, and the
    182  * frame pointer, it returns to the user
    183  * specified pc, psl.
    184  */
    185 
    186 void
    187 linux_sendsig(catcher, sig, mask, code)
    188 	sig_t catcher;
    189 	int sig;
    190 	sigset_t *mask;
    191 	u_long code;
    192 {
    193 	struct proc *p = curproc;
    194 	struct trapframe *tf;
    195 	struct linux_sigframe *fp, frame;
    196 	int onstack;
    197 
    198 	tf = p->p_md.md_regs;
    199 
    200 	/* Do we need to jump onto the signal stack? */
    201 	onstack =
    202 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    203 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    204 
    205 	/* Allocate space for the signal handler context. */
    206 	if (onstack)
    207 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    208 					  p->p_sigctx.ps_sigstk.ss_size);
    209 	else
    210 		fp = (struct linux_sigframe *)tf->tf_esp;
    211 	fp--;
    212 
    213 	/* Build stack frame for signal trampoline. */
    214 	frame.sf_handler = catcher;
    215 	frame.sf_sig = native_to_linux_signo[sig];
    216 
    217 	/* Save register context. */
    218 #ifdef VM86
    219 	if (tf->tf_eflags & PSL_VM) {
    220 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    221 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    222 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    223 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    224 		frame.sf_sc.sc_eflags = get_vflags(p);
    225 	} else
    226 #endif
    227 	{
    228 		frame.sf_sc.sc_gs = tf->tf_gs;
    229 		frame.sf_sc.sc_fs = tf->tf_fs;
    230 		frame.sf_sc.sc_es = tf->tf_es;
    231 		frame.sf_sc.sc_ds = tf->tf_ds;
    232 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    233 	}
    234 	frame.sf_sc.sc_edi = tf->tf_edi;
    235 	frame.sf_sc.sc_esi = tf->tf_esi;
    236 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    237 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    238 	frame.sf_sc.sc_edx = tf->tf_edx;
    239 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    240 	frame.sf_sc.sc_eax = tf->tf_eax;
    241 	frame.sf_sc.sc_eip = tf->tf_eip;
    242 	frame.sf_sc.sc_cs = tf->tf_cs;
    243 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    244 	frame.sf_sc.sc_ss = tf->tf_ss;
    245 	frame.sf_sc.sc_err = tf->tf_err;
    246 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    247 	frame.sf_sc.sc_cr2 = p->p_addr->u_pcb.pcb_cr2;
    248 
    249 	/* Save signal stack. */
    250 	/* Linux doesn't save the onstack flag in sigframe */
    251 
    252 	/* Save signal mask. */
    253 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
    254 
    255 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    256 		/*
    257 		 * Process has trashed its stack; give it an illegal
    258 		 * instruction to halt it in its tracks.
    259 		 */
    260 		sigexit(p, SIGILL);
    261 		/* NOTREACHED */
    262 	}
    263 
    264 	/*
    265 	 * Build context to run handler in.
    266 	 */
    267 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    268 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    269 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    270 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    271 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    272 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    273 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    274 	tf->tf_esp = (int)fp;
    275 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    276 
    277 	/* Remember that we're now on the signal stack. */
    278 	if (onstack)
    279 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    280 }
    281 
    282 /*
    283  * System call to cleanup state after a signal
    284  * has been taken.  Reset signal mask and
    285  * stack state from context left by sendsig (above).
    286  * Return to previous pc and psl as specified by
    287  * context left by sendsig. Check carefully to
    288  * make sure that the user has not modified the
    289  * psl to gain improper privileges or to cause
    290  * a machine fault.
    291  */
    292 int
    293 linux_sys_rt_sigreturn(p, v, retval)
    294 	struct proc *p;
    295 	void *v;
    296 	register_t *retval;
    297 {
    298 	/* XXX XAX write me */
    299 	return(ENOSYS);
    300 }
    301 
    302 int
    303 linux_sys_sigreturn(p, v, retval)
    304 	struct proc *p;
    305 	void *v;
    306 	register_t *retval;
    307 {
    308 	struct linux_sys_sigreturn_args /* {
    309 		syscallarg(struct linux_sigcontext *) scp;
    310 	} */ *uap = v;
    311 	struct linux_sigcontext *scp, context;
    312 	struct trapframe *tf;
    313 	sigset_t mask;
    314 	ssize_t ss_gap;
    315 
    316 	/*
    317 	 * The trampoline code hands us the context.
    318 	 * It is unsafe to keep track of it ourselves, in the event that a
    319 	 * program jumps out of a signal handler.
    320 	 */
    321 	scp = SCARG(uap, scp);
    322 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    323 		return (EFAULT);
    324 
    325 	/* Restore register context. */
    326 	tf = p->p_md.md_regs;
    327 #ifdef VM86
    328 	if (context.sc_eflags & PSL_VM) {
    329 		tf->tf_vm86_gs = context.sc_gs;
    330 		tf->tf_vm86_fs = context.sc_fs;
    331 		tf->tf_vm86_es = context.sc_es;
    332 		tf->tf_vm86_ds = context.sc_ds;
    333 		set_vflags(p, context.sc_eflags);
    334 	} else
    335 #endif
    336 	{
    337 		/*
    338 		 * Check for security violations.  If we're returning to
    339 		 * protected mode, the CPU will validate the segment registers
    340 		 * automatically and generate a trap on violations.  We handle
    341 		 * the trap, rather than doing all of the checking here.
    342 		 */
    343 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    344 		    !USERMODE(context.sc_cs, context.sc_eflags))
    345 			return (EINVAL);
    346 
    347 		tf->tf_gs = context.sc_gs;
    348 		tf->tf_fs = context.sc_fs;
    349 		tf->tf_es = context.sc_es;
    350 		tf->tf_ds = context.sc_ds;
    351 		tf->tf_eflags = context.sc_eflags;
    352 	}
    353 	tf->tf_edi = context.sc_edi;
    354 	tf->tf_esi = context.sc_esi;
    355 	tf->tf_ebp = context.sc_ebp;
    356 	tf->tf_ebx = context.sc_ebx;
    357 	tf->tf_edx = context.sc_edx;
    358 	tf->tf_ecx = context.sc_ecx;
    359 	tf->tf_eax = context.sc_eax;
    360 	tf->tf_eip = context.sc_eip;
    361 	tf->tf_cs = context.sc_cs;
    362 	tf->tf_esp = context.sc_esp_at_signal;
    363 	tf->tf_ss = context.sc_ss;
    364 
    365 	/* Restore signal stack. */
    366 	/*
    367 	 * Linux really does it this way; it doesn't have space in sigframe
    368 	 * to save the onstack flag.
    369 	 */
    370 	ss_gap = (ssize_t)
    371 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    372 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    373 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    374 	else
    375 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    376 
    377 	/* Restore signal mask. */
    378 	linux_old_to_native_sigset(&mask, &context.sc_mask);
    379 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    380 
    381 	return (EJUSTRETURN);
    382 }
    383 
    384 #ifdef USER_LDT
    385 
    386 int
    387 linux_read_ldt(p, uap, retval)
    388 	struct proc *p;
    389 	struct linux_sys_modify_ldt_args /* {
    390 		syscallarg(int) func;
    391 		syscallarg(void *) ptr;
    392 		syscallarg(size_t) bytecount;
    393 	} */ *uap;
    394 	register_t *retval;
    395 {
    396 	struct i386_get_ldt_args gl;
    397 	int error;
    398 	caddr_t sg;
    399 	char *parms;
    400 
    401 	DPRINTF(("linux_read_ldt!"));
    402 	sg = stackgap_init(p, 0);
    403 
    404 	gl.start = 0;
    405 	gl.desc = SCARG(uap, ptr);
    406 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    407 
    408 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    409 
    410 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    411 		return (error);
    412 
    413 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
    414 		return (error);
    415 
    416 	*retval *= sizeof(union descriptor);
    417 	return (0);
    418 }
    419 
    420 struct linux_ldt_info {
    421 	u_int entry_number;
    422 	u_long base_addr;
    423 	u_int limit;
    424 	u_int seg_32bit:1;
    425 	u_int contents:2;
    426 	u_int read_exec_only:1;
    427 	u_int limit_in_pages:1;
    428 	u_int seg_not_present:1;
    429 	u_int useable:1;
    430 };
    431 
    432 int
    433 linux_write_ldt(p, uap, retval)
    434 	struct proc *p;
    435 	struct linux_sys_modify_ldt_args /* {
    436 		syscallarg(int) func;
    437 		syscallarg(void *) ptr;
    438 		syscallarg(size_t) bytecount;
    439 	} */ *uap;
    440 	register_t *retval;
    441 {
    442 	struct linux_ldt_info ldt_info;
    443 	struct segment_descriptor sd;
    444 	struct i386_set_ldt_args sl;
    445 	int error;
    446 	caddr_t sg;
    447 	char *parms;
    448 	int oldmode = (int)retval[0];
    449 
    450 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    451 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    452 		return (EINVAL);
    453 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    454 		return error;
    455 	if (ldt_info.entry_number >= 8192)
    456 		return (EINVAL);
    457 	if (ldt_info.contents == 3) {
    458 		if (oldmode)
    459 			return (EINVAL);
    460 		if (ldt_info.seg_not_present)
    461 			return (EINVAL);
    462 	}
    463 
    464 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    465 	    (oldmode || (ldt_info.contents == 0 &&
    466 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    467 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    468 	    ldt_info.useable == 0))) {
    469 		/* this means you should zero the ldt */
    470 		(void)memset(&sd, 0, sizeof(sd));
    471 	} else {
    472 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    473 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    474 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    475 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    476 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    477 		    (!ldt_info.read_exec_only << 1);
    478 		sd.sd_dpl = SEL_UPL;
    479 		sd.sd_p = !ldt_info.seg_not_present;
    480 		sd.sd_def32 = ldt_info.seg_32bit;
    481 		sd.sd_gran = ldt_info.limit_in_pages;
    482 		if (!oldmode)
    483 			sd.sd_xx = ldt_info.useable;
    484 		else
    485 			sd.sd_xx = 0;
    486 	}
    487 	sg = stackgap_init(p, 0);
    488 	sl.start = ldt_info.entry_number;
    489 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    490 	sl.num = 1;
    491 
    492 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    493 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    494 
    495 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    496 
    497 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    498 		return (error);
    499 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    500 		return (error);
    501 
    502 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
    503 		return (error);
    504 
    505 	*retval = 0;
    506 	return (0);
    507 }
    508 
    509 #endif /* USER_LDT */
    510 
    511 int
    512 linux_sys_modify_ldt(p, v, retval)
    513 	struct proc *p;
    514 	void *v;
    515 	register_t *retval;
    516 {
    517 	struct linux_sys_modify_ldt_args /* {
    518 		syscallarg(int) func;
    519 		syscallarg(void *) ptr;
    520 		syscallarg(size_t) bytecount;
    521 	} */ *uap = v;
    522 
    523 	switch (SCARG(uap, func)) {
    524 #ifdef USER_LDT
    525 	case 0:
    526 		return linux_read_ldt(p, uap, retval);
    527 	case 1:
    528 		retval[0] = 1;
    529 		return linux_write_ldt(p, uap, retval);
    530 	case 2:
    531 #ifdef notyet
    532 		return (linux_read_default_ldt(p, uap, retval);
    533 #else
    534 		return (ENOSYS);
    535 #endif
    536 	case 0x11:
    537 		retval[0] = 0;
    538 		return linux_write_ldt(p, uap, retval);
    539 #endif /* USER_LDT */
    540 
    541 	default:
    542 		return (ENOSYS);
    543 	}
    544 }
    545 
    546 /*
    547  * XXX Pathetic hack to make svgalib work. This will fake the major
    548  * device number of an opened VT so that svgalib likes it. grmbl.
    549  * Should probably do it 'wrong the right way' and use a mapping
    550  * array for all major device numbers, and map linux_mknod too.
    551  */
    552 dev_t
    553 linux_fakedev(dev, raw)
    554 	dev_t dev;
    555 	int raw;
    556 {
    557 	if (raw) {
    558 #if (NWSDISPLAY > 0)
    559 		extern const struct cdevsw wsdisplay_cdevsw;
    560 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
    561 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    562 #endif
    563 		return 0;
    564 	} else {
    565 		return dev;
    566 	}
    567 }
    568 
    569 #if (NWSDISPLAY > 0)
    570 /*
    571  * That's not complete, but enough to get an X server running.
    572  */
    573 #define NR_KEYS 128
    574 static const u_short plain_map[NR_KEYS] = {
    575 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    576 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    577 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    578 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    579 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    580 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    581 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    582 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    583 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    584 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    585 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    586 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    587 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    588 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    589 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    590 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    591 }, shift_map[NR_KEYS] = {
    592 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    593 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    594 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    595 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    596 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    597 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    598 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    599 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    600 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    601 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    602 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    603 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    604 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    605 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    606 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    607 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    608 }, altgr_map[NR_KEYS] = {
    609 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    610 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    611 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    612 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    613 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    614 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    615 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    616 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    617 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    618 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    619 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    620 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    621 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    622 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    623 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    624 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    625 }, ctrl_map[NR_KEYS] = {
    626 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    627 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    628 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    629 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    630 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    631 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    632 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    633 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    634 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    635 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    636 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    637 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    638 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    639 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    640 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    641 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    642 };
    643 
    644 const u_short * const linux_keytabs[] = {
    645 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    646 };
    647 #endif
    648 
    649 static struct biosdisk_info *
    650 fd2biosinfo(p, fp)
    651 	struct proc *p;
    652 	struct file *fp;
    653 {
    654 	struct vnode *vp;
    655 	const char *blkname;
    656 	char diskname[16];
    657 	int i;
    658 	struct nativedisk_info *nip;
    659 	struct disklist *dl = i386_alldisks;
    660 
    661 	if (fp->f_type != DTYPE_VNODE)
    662 		return NULL;
    663 	vp = (struct vnode *)fp->f_data;
    664 
    665 	if (vp->v_type != VBLK)
    666 		return NULL;
    667 
    668 	blkname = devsw_blk2name(major(vp->v_rdev));
    669 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    670 	    DISKUNIT(vp->v_rdev));
    671 
    672 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    673 		nip = &dl->dl_nativedisks[i];
    674 		if (strcmp(diskname, nip->ni_devname))
    675 			continue;
    676 		if (nip->ni_nmatches != 0)
    677 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    678 	}
    679 
    680 	return NULL;
    681 }
    682 
    683 
    684 /*
    685  * We come here in a last attempt to satisfy a Linux ioctl() call
    686  */
    687 int
    688 linux_machdepioctl(p, v, retval)
    689 	struct proc *p;
    690 	void *v;
    691 	register_t *retval;
    692 {
    693 	struct linux_sys_ioctl_args /* {
    694 		syscallarg(int) fd;
    695 		syscallarg(u_long) com;
    696 		syscallarg(caddr_t) data;
    697 	} */ *uap = v;
    698 	struct sys_ioctl_args bia;
    699 	u_long com;
    700 	int error, error1;
    701 #if (NWSDISPLAY > 0)
    702 	struct vt_mode lvt;
    703 	caddr_t bvtp, sg;
    704 	struct kbentry kbe;
    705 #endif
    706 	struct linux_hd_geometry hdg;
    707 	struct linux_hd_big_geometry hdg_big;
    708 	struct biosdisk_info *bip;
    709 	struct filedesc *fdp;
    710 	struct file *fp;
    711 	int fd;
    712 	struct disklabel label, *labp;
    713 	struct partinfo partp;
    714 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    715 	u_long start, biostotal, realtotal;
    716 	u_char heads, sectors;
    717 	u_int cylinders;
    718 	struct ioctl_pt pt;
    719 
    720 	fd = SCARG(uap, fd);
    721 	SCARG(&bia, fd) = fd;
    722 	SCARG(&bia, data) = SCARG(uap, data);
    723 	com = SCARG(uap, com);
    724 
    725 	fdp = p->p_fd;
    726 
    727 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    728 		return (EBADF);
    729 
    730 	switch (com) {
    731 #if (NWSDISPLAY > 0)
    732 	case LINUX_KDGKBMODE:
    733 		com = KDGKBMODE;
    734 		break;
    735 	case LINUX_KDSKBMODE:
    736 		com = KDSKBMODE;
    737 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    738 			SCARG(&bia, data) = (caddr_t)K_RAW;
    739 		break;
    740 	case LINUX_KIOCSOUND:
    741 		SCARG(&bia, data) =
    742 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    743 		/* fall through */
    744 	case LINUX_KDMKTONE:
    745 		com = KDMKTONE;
    746 		break;
    747 	case LINUX_KDSETMODE:
    748 		com = KDSETMODE;
    749 		break;
    750 	case LINUX_KDGETMODE:
    751 		/* KD_* values are equal to the wscons numbers */
    752 		com = WSDISPLAYIO_GMODE;
    753 		break;
    754 	case LINUX_KDENABIO:
    755 		com = KDENABIO;
    756 		break;
    757 	case LINUX_KDDISABIO:
    758 		com = KDDISABIO;
    759 		break;
    760 	case LINUX_KDGETLED:
    761 		com = KDGETLED;
    762 		break;
    763 	case LINUX_KDSETLED:
    764 		com = KDSETLED;
    765 		break;
    766 	case LINUX_VT_OPENQRY:
    767 		com = VT_OPENQRY;
    768 		break;
    769 	case LINUX_VT_GETMODE:
    770 		SCARG(&bia, com) = VT_GETMODE;
    771 		if ((error = sys_ioctl(p, &bia, retval)))
    772 			return error;
    773 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    774 		    sizeof (struct vt_mode))))
    775 			return error;
    776 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    777 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    778 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    779 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    780 		    sizeof (struct vt_mode));
    781 	case LINUX_VT_SETMODE:
    782 		com = VT_SETMODE;
    783 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    784 		    sizeof (struct vt_mode))))
    785 			return error;
    786 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    787 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    788 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    789 		sg = stackgap_init(p, 0);
    790 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    791 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    792 			return error;
    793 		SCARG(&bia, data) = bvtp;
    794 		break;
    795 	case LINUX_VT_DISALLOCATE:
    796 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    797 		return 0;
    798 	case LINUX_VT_RELDISP:
    799 		com = VT_RELDISP;
    800 		break;
    801 	case LINUX_VT_ACTIVATE:
    802 		com = VT_ACTIVATE;
    803 		break;
    804 	case LINUX_VT_WAITACTIVE:
    805 		com = VT_WAITACTIVE;
    806 		break;
    807 	case LINUX_VT_GETSTATE:
    808 		com = VT_GETSTATE;
    809 		break;
    810 	case LINUX_KDGKBTYPE:
    811 		/* This is what Linux does. */
    812 		return (subyte(SCARG(uap, data), KB_101));
    813 	case LINUX_KDGKBENT:
    814 		/*
    815 		 * The Linux KDGKBENT ioctl is different from the
    816 		 * SYSV original. So we handle it in machdep code.
    817 		 * XXX We should use keyboard mapping information
    818 		 * from wsdisplay, but this would be expensive.
    819 		 */
    820 		if ((error = copyin(SCARG(uap, data), &kbe,
    821 				    sizeof(struct kbentry))))
    822 			return (error);
    823 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    824 		    || kbe.kb_index >= NR_KEYS)
    825 			return (EINVAL);
    826 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    827 		return (copyout(&kbe, SCARG(uap, data),
    828 				sizeof(struct kbentry)));
    829 #endif
    830 	case LINUX_HDIO_GETGEO:
    831 	case LINUX_HDIO_GETGEO_BIG:
    832 		/*
    833 		 * Try to mimic Linux behaviour: return the BIOS geometry
    834 		 * if possible (extending its # of cylinders if it's beyond
    835 		 * the 1023 limit), fall back to the MI geometry (i.e.
    836 		 * the real geometry) if not found, by returning an
    837 		 * error. See common/linux_hdio.c
    838 		 */
    839 		FILE_USE(fp);
    840 		bip = fd2biosinfo(p, fp);
    841 		ioctlf = fp->f_ops->fo_ioctl;
    842 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    843 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    844 		FILE_UNUSE(fp, p);
    845 		if (error != 0 && error1 != 0)
    846 			return error1;
    847 		labp = error != 0 ? &label : partp.disklab;
    848 		start = error1 != 0 ? partp.part->p_offset : 0;
    849 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    850 		    && bip->bi_cyl != 0) {
    851 			heads = bip->bi_head;
    852 			sectors = bip->bi_sec;
    853 			cylinders = bip->bi_cyl;
    854 			biostotal = heads * sectors * cylinders;
    855 			realtotal = labp->d_ntracks * labp->d_nsectors *
    856 			    labp->d_ncylinders;
    857 			if (realtotal > biostotal)
    858 				cylinders = realtotal / (heads * sectors);
    859 		} else {
    860 			heads = labp->d_ntracks;
    861 			cylinders = labp->d_ncylinders;
    862 			sectors = labp->d_nsectors;
    863 		}
    864 		if (com == LINUX_HDIO_GETGEO) {
    865 			hdg.start = start;
    866 			hdg.heads = heads;
    867 			hdg.cylinders = cylinders;
    868 			hdg.sectors = sectors;
    869 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    870 		} else {
    871 			hdg_big.start = start;
    872 			hdg_big.heads = heads;
    873 			hdg_big.cylinders = cylinders;
    874 			hdg_big.sectors = sectors;
    875 			return copyout(&hdg_big, SCARG(uap, data),
    876 			    sizeof hdg_big);
    877 		}
    878 
    879 	default:
    880 		/*
    881 		 * Unknown to us. If it's on a device, just pass it through
    882 		 * using PTIOCLINUX, the device itself might be able to
    883 		 * make some sense of it.
    884 		 * XXX hack: if the function returns EJUSTRETURN,
    885 		 * it has stuffed a sysctl return value in pt.data.
    886 		 */
    887 		FILE_USE(fp);
    888 		ioctlf = fp->f_ops->fo_ioctl;
    889 		pt.com = SCARG(uap, com);
    890 		pt.data = SCARG(uap, data);
    891 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    892 		FILE_UNUSE(fp, p);
    893 		if (error == EJUSTRETURN) {
    894 			retval[0] = (register_t)pt.data;
    895 			error = 0;
    896 		}
    897 
    898 		if (error == ENOTTY)
    899 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
    900 			    com));
    901 		return error;
    902 	}
    903 	SCARG(&bia, com) = com;
    904 	return sys_ioctl(p, &bia, retval);
    905 }
    906 
    907 /*
    908  * Set I/O permissions for a process. Just set the maximum level
    909  * right away (ignoring the argument), otherwise we would have
    910  * to rely on I/O permission maps, which are not implemented.
    911  */
    912 int
    913 linux_sys_iopl(p, v, retval)
    914 	struct proc *p;
    915 	void *v;
    916 	register_t *retval;
    917 {
    918 #if 0
    919 	struct linux_sys_iopl_args /* {
    920 		syscallarg(int) level;
    921 	} */ *uap = v;
    922 #endif
    923 	struct trapframe *fp = p->p_md.md_regs;
    924 
    925 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    926 		return EPERM;
    927 	fp->tf_eflags |= PSL_IOPL;
    928 	*retval = 0;
    929 	return 0;
    930 }
    931 
    932 /*
    933  * See above. If a root process tries to set access to an I/O port,
    934  * just let it have the whole range.
    935  */
    936 int
    937 linux_sys_ioperm(p, v, retval)
    938 	struct proc *p;
    939 	void *v;
    940 	register_t *retval;
    941 {
    942 	struct linux_sys_ioperm_args /* {
    943 		syscallarg(unsigned int) lo;
    944 		syscallarg(unsigned int) hi;
    945 		syscallarg(int) val;
    946 	} */ *uap = v;
    947 	struct trapframe *fp = p->p_md.md_regs;
    948 
    949 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    950 		return EPERM;
    951 	if (SCARG(uap, val))
    952 		fp->tf_eflags |= PSL_IOPL;
    953 	*retval = 0;
    954 	return 0;
    955 }
    956