Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.62.2.15
      1 /*	$NetBSD: linux_machdep.c,v 1.62.2.15 2002/10/18 02:41:09 nathanw Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.62.2.15 2002/10/18 02:41:09 nathanw Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <miscfs/specfs/specdev.h>
     72 
     73 #include <compat/linux/common/linux_types.h>
     74 #include <compat/linux/common/linux_signal.h>
     75 #include <compat/linux/common/linux_util.h>
     76 #include <compat/linux/common/linux_ioctl.h>
     77 #include <compat/linux/common/linux_hdio.h>
     78 #include <compat/linux/common/linux_exec.h>
     79 #include <compat/linux/common/linux_machdep.h>
     80 
     81 #include <compat/linux/linux_syscallargs.h>
     82 
     83 #include <machine/cpu.h>
     84 #include <machine/cpufunc.h>
     85 #include <machine/psl.h>
     86 #include <machine/reg.h>
     87 #include <machine/segments.h>
     88 #include <machine/specialreg.h>
     89 #include <machine/sysarch.h>
     90 #include <machine/vm86.h>
     91 #include <machine/vmparam.h>
     92 
     93 /*
     94  * To see whether wscons is configured (for virtual console ioctl calls).
     95  */
     96 #if defined(_KERNEL_OPT)
     97 #include "wsdisplay.h"
     98 #endif
     99 #if (NWSDISPLAY > 0)
    100 #include <dev/wscons/wsconsio.h>
    101 #include <dev/wscons/wsdisplay_usl_io.h>
    102 #if defined(_KERNEL_OPT)
    103 #include "opt_xserver.h"
    104 #endif
    105 #endif
    106 
    107 #ifdef USER_LDT
    108 #include <machine/cpu.h>
    109 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    110     register_t *));
    111 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    112     register_t *));
    113 #endif
    114 
    115 #ifdef DEBUG_LINUX
    116 #define DPRINTF(a) uprintf a
    117 #else
    118 #define DPRINTF(a)
    119 #endif
    120 
    121 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    122 extern struct disklist *i386_alldisks;
    123 
    124 /*
    125  * Deal with some i386-specific things in the Linux emulation code.
    126  */
    127 
    128 void
    129 linux_setregs(l, epp, stack)
    130 	struct lwp *l;
    131 	struct exec_package *epp;
    132 	u_long stack;
    133 {
    134 	struct pcb *pcb = &l->l_addr->u_pcb;
    135 	struct trapframe *tf;
    136 
    137 #if NNPX > 0
    138 	/* If we were using the FPU, forget about it. */
    139 	if (npxproc == l)
    140 		npxdrop();
    141 #endif
    142 
    143 #ifdef USER_LDT
    144 	pmap_ldt_cleanup(l);
    145 #endif
    146 
    147 	l->l_md.md_flags &= ~MDP_USEDFPU;
    148 
    149 	if (i386_use_fxsave) {
    150 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    151 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    152 	} else
    153 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    154 
    155 	tf = l->l_md.md_regs;
    156 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    157 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    158 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    159 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    160 	tf->tf_edi = 0;
    161 	tf->tf_esi = 0;
    162 	tf->tf_ebp = 0;
    163 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    164 	tf->tf_edx = 0;
    165 	tf->tf_ecx = 0;
    166 	tf->tf_eax = 0;
    167 	tf->tf_eip = epp->ep_entry;
    168 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    169 	tf->tf_eflags = PSL_USERSET;
    170 	tf->tf_esp = stack;
    171 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    172 }
    173 
    174 /*
    175  * Send an interrupt to process.
    176  *
    177  * Stack is set up to allow sigcode stored
    178  * in u. to call routine, followed by kcall
    179  * to sigreturn routine below.  After sigreturn
    180  * resets the signal mask, the stack, and the
    181  * frame pointer, it returns to the user
    182  * specified pc, psl.
    183  */
    184 
    185 void
    186 linux_sendsig(sig, mask, code)
    187 	int sig;
    188 	sigset_t *mask;
    189 	u_long code;
    190 {
    191 	struct lwp *l = curlwp;
    192 	struct proc *p = l->l_proc;
    193 	struct trapframe *tf;
    194 	struct linux_sigframe *fp, frame;
    195 	int onstack;
    196 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    197 
    198 	tf = l->l_md.md_regs;
    199 	/* Do we need to jump onto the signal stack? */
    200 	onstack =
    201 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    202 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    203 
    204 	/* Allocate space for the signal handler context. */
    205 	if (onstack)
    206 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    207 					  p->p_sigctx.ps_sigstk.ss_size);
    208 	else
    209 		fp = (struct linux_sigframe *)tf->tf_esp;
    210 	fp--;
    211 
    212 	/* Build stack frame for signal trampoline. */
    213 	frame.sf_handler = catcher;
    214 	frame.sf_sig = native_to_linux_signo[sig];
    215 
    216 	/* Save register context. */
    217 #ifdef VM86
    218 	if (tf->tf_eflags & PSL_VM) {
    219 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    220 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    221 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    222 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    223 		frame.sf_sc.sc_eflags = get_vflags(l);
    224 	} else
    225 #endif
    226 	{
    227 		frame.sf_sc.sc_gs = tf->tf_gs;
    228 		frame.sf_sc.sc_fs = tf->tf_fs;
    229 		frame.sf_sc.sc_es = tf->tf_es;
    230 		frame.sf_sc.sc_ds = tf->tf_ds;
    231 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    232 	}
    233 	frame.sf_sc.sc_edi = tf->tf_edi;
    234 	frame.sf_sc.sc_esi = tf->tf_esi;
    235 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    236 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    237 	frame.sf_sc.sc_edx = tf->tf_edx;
    238 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    239 	frame.sf_sc.sc_eax = tf->tf_eax;
    240 	frame.sf_sc.sc_eip = tf->tf_eip;
    241 	frame.sf_sc.sc_cs = tf->tf_cs;
    242 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    243 	frame.sf_sc.sc_ss = tf->tf_ss;
    244 	frame.sf_sc.sc_err = tf->tf_err;
    245 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    246 	frame.sf_sc.sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    247 
    248 	/* Save signal stack. */
    249 	/* Linux doesn't save the onstack flag in sigframe */
    250 
    251 	/* Save signal mask. */
    252 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
    253 
    254 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    255 		/*
    256 		 * Process has trashed its stack; give it an illegal
    257 		 * instruction to halt it in its tracks.
    258 		 */
    259 		sigexit(l, SIGILL);
    260 		/* NOTREACHED */
    261 	}
    262 
    263 	/*
    264 	 * Build context to run handler in.
    265 	 */
    266 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    267 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    268 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    269 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    270 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    271 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    272 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    273 	tf->tf_esp = (int)fp;
    274 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    275 
    276 	/* Remember that we're now on the signal stack. */
    277 	if (onstack)
    278 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    279 }
    280 
    281 /*
    282  * System call to cleanup state after a signal
    283  * has been taken.  Reset signal mask and
    284  * stack state from context left by sendsig (above).
    285  * Return to previous pc and psl as specified by
    286  * context left by sendsig. Check carefully to
    287  * make sure that the user has not modified the
    288  * psl to gain improper privileges or to cause
    289  * a machine fault.
    290  */
    291 int
    292 linux_sys_rt_sigreturn(l, v, retval)
    293 	struct lwp *l;
    294 	void *v;
    295 	register_t *retval;
    296 {
    297 	/* XXX XAX write me */
    298 	return(ENOSYS);
    299 }
    300 
    301 int
    302 linux_sys_sigreturn(l, v, retval)
    303 	struct lwp *l;
    304 	void *v;
    305 	register_t *retval;
    306 {
    307 	struct linux_sys_sigreturn_args /* {
    308 		syscallarg(struct linux_sigcontext *) scp;
    309 	} */ *uap = v;
    310 	struct proc *p = l->l_proc;
    311 	struct linux_sigcontext *scp, context;
    312 	struct trapframe *tf;
    313 	sigset_t mask;
    314 	ssize_t ss_gap;
    315 
    316 	/*
    317 	 * The trampoline code hands us the context.
    318 	 * It is unsafe to keep track of it ourselves, in the event that a
    319 	 * program jumps out of a signal handler.
    320 	 */
    321 	scp = SCARG(uap, scp);
    322 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    323 		return (EFAULT);
    324 
    325 	/* Restore register context. */
    326 	tf = l->l_md.md_regs;
    327 #ifdef VM86
    328 	if (context.sc_eflags & PSL_VM) {
    329 		tf->tf_vm86_gs = context.sc_gs;
    330 		tf->tf_vm86_fs = context.sc_fs;
    331 		tf->tf_vm86_es = context.sc_es;
    332 		tf->tf_vm86_ds = context.sc_ds;
    333 		set_vflags(l, context.sc_eflags);
    334 	} else
    335 #endif
    336 	{
    337 		/*
    338 		 * Check for security violations.  If we're returning to
    339 		 * protected mode, the CPU will validate the segment registers
    340 		 * automatically and generate a trap on violations.  We handle
    341 		 * the trap, rather than doing all of the checking here.
    342 		 */
    343 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    344 		    !USERMODE(context.sc_cs, context.sc_eflags))
    345 			return (EINVAL);
    346 
    347 		tf->tf_gs = context.sc_gs;
    348 		tf->tf_fs = context.sc_fs;
    349 		tf->tf_es = context.sc_es;
    350 		tf->tf_ds = context.sc_ds;
    351 		tf->tf_eflags = context.sc_eflags;
    352 	}
    353 	tf->tf_edi = context.sc_edi;
    354 	tf->tf_esi = context.sc_esi;
    355 	tf->tf_ebp = context.sc_ebp;
    356 	tf->tf_ebx = context.sc_ebx;
    357 	tf->tf_edx = context.sc_edx;
    358 	tf->tf_ecx = context.sc_ecx;
    359 	tf->tf_eax = context.sc_eax;
    360 	tf->tf_eip = context.sc_eip;
    361 	tf->tf_cs = context.sc_cs;
    362 	tf->tf_esp = context.sc_esp_at_signal;
    363 	tf->tf_ss = context.sc_ss;
    364 
    365 	/* Restore signal stack. */
    366 	/*
    367 	 * Linux really does it this way; it doesn't have space in sigframe
    368 	 * to save the onstack flag.
    369 	 */
    370 	ss_gap = (ssize_t)
    371 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    372 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    373 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    374 	else
    375 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    376 
    377 	/* Restore signal mask. */
    378 	linux_old_to_native_sigset(&mask, &context.sc_mask);
    379 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    380 
    381 	return (EJUSTRETURN);
    382 }
    383 
    384 #ifdef USER_LDT
    385 
    386 int
    387 linux_read_ldt(l, uap, retval)
    388 	struct lwp *l;
    389 	struct linux_sys_modify_ldt_args /* {
    390 		syscallarg(int) func;
    391 		syscallarg(void *) ptr;
    392 		syscallarg(size_t) bytecount;
    393 	} */ *uap;
    394 	register_t *retval;
    395 {
    396 	struct proc *p = l->l_proc;
    397 	struct i386_get_ldt_args gl;
    398 	int error;
    399 	caddr_t sg;
    400 	char *parms;
    401 
    402 	DPRINTF(("linux_read_ldt!"));
    403 	sg = stackgap_init(p, 0);
    404 
    405 	gl.start = 0;
    406 	gl.desc = SCARG(uap, ptr);
    407 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    408 
    409 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    410 
    411 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    412 		return (error);
    413 
    414 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    415 		return (error);
    416 
    417 	*retval *= sizeof(union descriptor);
    418 	return (0);
    419 }
    420 
    421 struct linux_ldt_info {
    422 	u_int entry_number;
    423 	u_long base_addr;
    424 	u_int limit;
    425 	u_int seg_32bit:1;
    426 	u_int contents:2;
    427 	u_int read_exec_only:1;
    428 	u_int limit_in_pages:1;
    429 	u_int seg_not_present:1;
    430 	u_int useable:1;
    431 };
    432 
    433 int
    434 linux_write_ldt(l, uap, retval)
    435 	struct lwp *l;
    436 	struct linux_sys_modify_ldt_args /* {
    437 		syscallarg(int) func;
    438 		syscallarg(void *) ptr;
    439 		syscallarg(size_t) bytecount;
    440 	} */ *uap;
    441 	register_t *retval;
    442 {
    443 	struct proc *p = l->l_proc;
    444 	struct linux_ldt_info ldt_info;
    445 	struct segment_descriptor sd;
    446 	struct i386_set_ldt_args sl;
    447 	int error;
    448 	caddr_t sg;
    449 	char *parms;
    450 	int oldmode = (int)retval[0];
    451 
    452 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    453 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    454 		return (EINVAL);
    455 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    456 		return error;
    457 	if (ldt_info.entry_number >= 8192)
    458 		return (EINVAL);
    459 	if (ldt_info.contents == 3) {
    460 		if (oldmode)
    461 			return (EINVAL);
    462 		if (ldt_info.seg_not_present)
    463 			return (EINVAL);
    464 	}
    465 
    466 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    467 	    (oldmode || (ldt_info.contents == 0 &&
    468 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    469 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    470 	    ldt_info.useable == 0))) {
    471 		/* this means you should zero the ldt */
    472 		(void)memset(&sd, 0, sizeof(sd));
    473 	} else {
    474 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    475 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    476 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    477 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    478 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    479 		    (!ldt_info.read_exec_only << 1);
    480 		sd.sd_dpl = SEL_UPL;
    481 		sd.sd_p = !ldt_info.seg_not_present;
    482 		sd.sd_def32 = ldt_info.seg_32bit;
    483 		sd.sd_gran = ldt_info.limit_in_pages;
    484 		if (!oldmode)
    485 			sd.sd_xx = ldt_info.useable;
    486 		else
    487 			sd.sd_xx = 0;
    488 	}
    489 	sg = stackgap_init(p, 0);
    490 	sl.start = ldt_info.entry_number;
    491 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    492 	sl.num = 1;
    493 
    494 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    495 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    496 
    497 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    498 
    499 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    500 		return (error);
    501 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    502 		return (error);
    503 
    504 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    505 		return (error);
    506 
    507 	*retval = 0;
    508 	return (0);
    509 }
    510 
    511 #endif /* USER_LDT */
    512 
    513 int
    514 linux_sys_modify_ldt(l, v, retval)
    515 	struct lwp *l;
    516 	void *v;
    517 	register_t *retval;
    518 {
    519 	struct linux_sys_modify_ldt_args /* {
    520 		syscallarg(int) func;
    521 		syscallarg(void *) ptr;
    522 		syscallarg(size_t) bytecount;
    523 	} */ *uap = v;
    524 
    525 	switch (SCARG(uap, func)) {
    526 #ifdef USER_LDT
    527 	case 0:
    528 		return linux_read_ldt(l, uap, retval);
    529 	case 1:
    530 		retval[0] = 1;
    531 		return linux_write_ldt(l, uap, retval);
    532 	case 2:
    533 #ifdef notyet
    534 		return (linux_read_default_ldt(l, uap, retval);
    535 #else
    536 		return (ENOSYS);
    537 #endif
    538 	case 0x11:
    539 		retval[0] = 0;
    540 		return linux_write_ldt(l, uap, retval);
    541 #endif /* USER_LDT */
    542 
    543 	default:
    544 		return (ENOSYS);
    545 	}
    546 }
    547 
    548 /*
    549  * XXX Pathetic hack to make svgalib work. This will fake the major
    550  * device number of an opened VT so that svgalib likes it. grmbl.
    551  * Should probably do it 'wrong the right way' and use a mapping
    552  * array for all major device numbers, and map linux_mknod too.
    553  */
    554 dev_t
    555 linux_fakedev(dev, raw)
    556 	dev_t dev;
    557 	int raw;
    558 {
    559 	if (raw) {
    560 #if (NWSDISPLAY > 0)
    561 		extern const struct cdevsw wsdisplay_cdevsw;
    562 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
    563 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    564 #endif
    565 	}
    566 
    567 	return dev;
    568 }
    569 
    570 #if (NWSDISPLAY > 0)
    571 /*
    572  * That's not complete, but enough to get an X server running.
    573  */
    574 #define NR_KEYS 128
    575 static const u_short plain_map[NR_KEYS] = {
    576 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    577 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    578 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    579 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    580 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    581 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    582 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    583 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    584 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    585 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    586 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    587 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    588 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    589 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    590 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    591 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    592 }, shift_map[NR_KEYS] = {
    593 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    594 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    595 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    596 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    597 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    598 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    599 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    600 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    601 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    602 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    603 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    604 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    605 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    606 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    607 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    608 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    609 }, altgr_map[NR_KEYS] = {
    610 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    611 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    612 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    613 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    614 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    615 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    616 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    617 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    618 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    619 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    620 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    621 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    622 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    623 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    624 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    625 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    626 }, ctrl_map[NR_KEYS] = {
    627 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    628 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    629 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    630 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    631 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    632 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    633 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    634 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    635 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    636 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    637 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    638 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    639 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    640 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    641 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    642 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    643 };
    644 
    645 const u_short * const linux_keytabs[] = {
    646 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    647 };
    648 #endif
    649 
    650 static struct biosdisk_info *
    651 fd2biosinfo(p, fp)
    652 	struct proc *p;
    653 	struct file *fp;
    654 {
    655 	struct vnode *vp;
    656 	const char *blkname;
    657 	char diskname[16];
    658 	int i;
    659 	struct nativedisk_info *nip;
    660 	struct disklist *dl = i386_alldisks;
    661 
    662 	if (fp->f_type != DTYPE_VNODE)
    663 		return NULL;
    664 	vp = (struct vnode *)fp->f_data;
    665 
    666 	if (vp->v_type != VBLK)
    667 		return NULL;
    668 
    669 	blkname = devsw_blk2name(major(vp->v_rdev));
    670 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    671 	    DISKUNIT(vp->v_rdev));
    672 
    673 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    674 		nip = &dl->dl_nativedisks[i];
    675 		if (strcmp(diskname, nip->ni_devname))
    676 			continue;
    677 		if (nip->ni_nmatches != 0)
    678 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    679 	}
    680 
    681 	return NULL;
    682 }
    683 
    684 
    685 /*
    686  * We come here in a last attempt to satisfy a Linux ioctl() call
    687  */
    688 int
    689 linux_machdepioctl(p, v, retval)
    690 	struct proc *p;
    691 	void *v;
    692 	register_t *retval;
    693 {
    694 	struct linux_sys_ioctl_args /* {
    695 		syscallarg(int) fd;
    696 		syscallarg(u_long) com;
    697 		syscallarg(caddr_t) data;
    698 	} */ *uap = v;
    699 	struct sys_ioctl_args bia;
    700 	u_long com;
    701 	int error, error1;
    702 #if (NWSDISPLAY > 0)
    703 	struct vt_mode lvt;
    704 	caddr_t bvtp, sg;
    705 	struct kbentry kbe;
    706 #endif
    707 	struct linux_hd_geometry hdg;
    708 	struct linux_hd_big_geometry hdg_big;
    709 	struct biosdisk_info *bip;
    710 	struct filedesc *fdp;
    711 	struct file *fp;
    712 	int fd;
    713 	struct disklabel label, *labp;
    714 	struct partinfo partp;
    715 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    716 	u_long start, biostotal, realtotal;
    717 	u_char heads, sectors;
    718 	u_int cylinders;
    719 	struct ioctl_pt pt;
    720 
    721 	fd = SCARG(uap, fd);
    722 	SCARG(&bia, fd) = fd;
    723 	SCARG(&bia, data) = SCARG(uap, data);
    724 	com = SCARG(uap, com);
    725 
    726 	fdp = p->p_fd;
    727 
    728 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    729 		return (EBADF);
    730 
    731 	switch (com) {
    732 #if (NWSDISPLAY > 0)
    733 	case LINUX_KDGKBMODE:
    734 		com = KDGKBMODE;
    735 		break;
    736 	case LINUX_KDSKBMODE:
    737 		com = KDSKBMODE;
    738 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    739 			SCARG(&bia, data) = (caddr_t)K_RAW;
    740 		break;
    741 	case LINUX_KIOCSOUND:
    742 		SCARG(&bia, data) =
    743 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    744 		/* fall through */
    745 	case LINUX_KDMKTONE:
    746 		com = KDMKTONE;
    747 		break;
    748 	case LINUX_KDSETMODE:
    749 		com = KDSETMODE;
    750 		break;
    751 	case LINUX_KDGETMODE:
    752 		/* KD_* values are equal to the wscons numbers */
    753 		com = WSDISPLAYIO_GMODE;
    754 		break;
    755 	case LINUX_KDENABIO:
    756 		com = KDENABIO;
    757 		break;
    758 	case LINUX_KDDISABIO:
    759 		com = KDDISABIO;
    760 		break;
    761 	case LINUX_KDGETLED:
    762 		com = KDGETLED;
    763 		break;
    764 	case LINUX_KDSETLED:
    765 		com = KDSETLED;
    766 		break;
    767 	case LINUX_VT_OPENQRY:
    768 		com = VT_OPENQRY;
    769 		break;
    770 	case LINUX_VT_GETMODE:
    771 		SCARG(&bia, com) = VT_GETMODE;
    772 		/* XXX NJWLWP */
    773 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    774 			return error;
    775 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    776 		    sizeof (struct vt_mode))))
    777 			return error;
    778 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    779 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    780 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    781 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    782 		    sizeof (struct vt_mode));
    783 	case LINUX_VT_SETMODE:
    784 		com = VT_SETMODE;
    785 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    786 		    sizeof (struct vt_mode))))
    787 			return error;
    788 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    789 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    790 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    791 		sg = stackgap_init(p, 0);
    792 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    793 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    794 			return error;
    795 		SCARG(&bia, data) = bvtp;
    796 		break;
    797 	case LINUX_VT_DISALLOCATE:
    798 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    799 		return 0;
    800 	case LINUX_VT_RELDISP:
    801 		com = VT_RELDISP;
    802 		break;
    803 	case LINUX_VT_ACTIVATE:
    804 		com = VT_ACTIVATE;
    805 		break;
    806 	case LINUX_VT_WAITACTIVE:
    807 		com = VT_WAITACTIVE;
    808 		break;
    809 	case LINUX_VT_GETSTATE:
    810 		com = VT_GETSTATE;
    811 		break;
    812 	case LINUX_KDGKBTYPE:
    813 		/* This is what Linux does. */
    814 		return (subyte(SCARG(uap, data), KB_101));
    815 	case LINUX_KDGKBENT:
    816 		/*
    817 		 * The Linux KDGKBENT ioctl is different from the
    818 		 * SYSV original. So we handle it in machdep code.
    819 		 * XXX We should use keyboard mapping information
    820 		 * from wsdisplay, but this would be expensive.
    821 		 */
    822 		if ((error = copyin(SCARG(uap, data), &kbe,
    823 				    sizeof(struct kbentry))))
    824 			return (error);
    825 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    826 		    || kbe.kb_index >= NR_KEYS)
    827 			return (EINVAL);
    828 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    829 		return (copyout(&kbe, SCARG(uap, data),
    830 				sizeof(struct kbentry)));
    831 #endif
    832 	case LINUX_HDIO_GETGEO:
    833 	case LINUX_HDIO_GETGEO_BIG:
    834 		/*
    835 		 * Try to mimic Linux behaviour: return the BIOS geometry
    836 		 * if possible (extending its # of cylinders if it's beyond
    837 		 * the 1023 limit), fall back to the MI geometry (i.e.
    838 		 * the real geometry) if not found, by returning an
    839 		 * error. See common/linux_hdio.c
    840 		 */
    841 		FILE_USE(fp);
    842 		bip = fd2biosinfo(p, fp);
    843 		ioctlf = fp->f_ops->fo_ioctl;
    844 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    845 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    846 		FILE_UNUSE(fp, p);
    847 		if (error != 0 && error1 != 0)
    848 			return error1;
    849 		labp = error != 0 ? &label : partp.disklab;
    850 		start = error1 != 0 ? partp.part->p_offset : 0;
    851 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    852 		    && bip->bi_cyl != 0) {
    853 			heads = bip->bi_head;
    854 			sectors = bip->bi_sec;
    855 			cylinders = bip->bi_cyl;
    856 			biostotal = heads * sectors * cylinders;
    857 			realtotal = labp->d_ntracks * labp->d_nsectors *
    858 			    labp->d_ncylinders;
    859 			if (realtotal > biostotal)
    860 				cylinders = realtotal / (heads * sectors);
    861 		} else {
    862 			heads = labp->d_ntracks;
    863 			cylinders = labp->d_ncylinders;
    864 			sectors = labp->d_nsectors;
    865 		}
    866 		if (com == LINUX_HDIO_GETGEO) {
    867 			hdg.start = start;
    868 			hdg.heads = heads;
    869 			hdg.cylinders = cylinders;
    870 			hdg.sectors = sectors;
    871 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    872 		} else {
    873 			hdg_big.start = start;
    874 			hdg_big.heads = heads;
    875 			hdg_big.cylinders = cylinders;
    876 			hdg_big.sectors = sectors;
    877 			return copyout(&hdg_big, SCARG(uap, data),
    878 			    sizeof hdg_big);
    879 		}
    880 
    881 	default:
    882 		/*
    883 		 * Unknown to us. If it's on a device, just pass it through
    884 		 * using PTIOCLINUX, the device itself might be able to
    885 		 * make some sense of it.
    886 		 * XXX hack: if the function returns EJUSTRETURN,
    887 		 * it has stuffed a sysctl return value in pt.data.
    888 		 */
    889 		FILE_USE(fp);
    890 		ioctlf = fp->f_ops->fo_ioctl;
    891 		pt.com = SCARG(uap, com);
    892 		pt.data = SCARG(uap, data);
    893 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    894 		FILE_UNUSE(fp, p);
    895 		if (error == EJUSTRETURN) {
    896 			retval[0] = (register_t)pt.data;
    897 			error = 0;
    898 		}
    899 
    900 		if (error == ENOTTY)
    901 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
    902 			    com));
    903 		return error;
    904 	}
    905 	SCARG(&bia, com) = com;
    906 	/* XXX NJWLWP */
    907 	return sys_ioctl(curlwp, &bia, retval);
    908 }
    909 
    910 /*
    911  * Set I/O permissions for a process. Just set the maximum level
    912  * right away (ignoring the argument), otherwise we would have
    913  * to rely on I/O permission maps, which are not implemented.
    914  */
    915 int
    916 linux_sys_iopl(l, v, retval)
    917 	struct lwp *l;
    918 	void *v;
    919 	register_t *retval;
    920 {
    921 #if 0
    922 	struct linux_sys_iopl_args /* {
    923 		syscallarg(int) level;
    924 	} */ *uap = v;
    925 #endif
    926 	struct proc *p = l->l_proc;
    927 	struct trapframe *fp = l->l_md.md_regs;
    928 
    929 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    930 		return EPERM;
    931 	fp->tf_eflags |= PSL_IOPL;
    932 	*retval = 0;
    933 	return 0;
    934 }
    935 
    936 /*
    937  * See above. If a root process tries to set access to an I/O port,
    938  * just let it have the whole range.
    939  */
    940 int
    941 linux_sys_ioperm(l, v, retval)
    942 	struct lwp *l;
    943 	void *v;
    944 	register_t *retval;
    945 {
    946 	struct linux_sys_ioperm_args /* {
    947 		syscallarg(unsigned int) lo;
    948 		syscallarg(unsigned int) hi;
    949 		syscallarg(int) val;
    950 	} */ *uap = v;
    951 	struct proc *p = l->l_proc;
    952 	struct trapframe *fp = l->l_md.md_regs;
    953 
    954 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    955 		return EPERM;
    956 	if (SCARG(uap, val))
    957 		fp->tf_eflags |= PSL_IOPL;
    958 	*retval = 0;
    959 	return 0;
    960 }
    961