Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.62.2.14
      1 /*	$NetBSD: linux_machdep.c,v 1.62.2.14 2002/09/17 21:18:58 nathanw Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.62.2.14 2002/09/17 21:18:58 nathanw Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/map.h>
     52 #include <sys/proc.h>
     53 #include <sys/user.h>
     54 #include <sys/buf.h>
     55 #include <sys/reboot.h>
     56 #include <sys/conf.h>
     57 #include <sys/exec.h>
     58 #include <sys/file.h>
     59 #include <sys/callout.h>
     60 #include <sys/malloc.h>
     61 #include <sys/mbuf.h>
     62 #include <sys/msgbuf.h>
     63 #include <sys/mount.h>
     64 #include <sys/vnode.h>
     65 #include <sys/device.h>
     66 #include <sys/sa.h>
     67 #include <sys/syscallargs.h>
     68 #include <sys/filedesc.h>
     69 #include <sys/exec_elf.h>
     70 #include <sys/disklabel.h>
     71 #include <sys/ioctl.h>
     72 #include <miscfs/specfs/specdev.h>
     73 
     74 #include <compat/linux/common/linux_types.h>
     75 #include <compat/linux/common/linux_signal.h>
     76 #include <compat/linux/common/linux_util.h>
     77 #include <compat/linux/common/linux_ioctl.h>
     78 #include <compat/linux/common/linux_hdio.h>
     79 #include <compat/linux/common/linux_exec.h>
     80 #include <compat/linux/common/linux_machdep.h>
     81 
     82 #include <compat/linux/linux_syscallargs.h>
     83 
     84 #include <machine/cpu.h>
     85 #include <machine/cpufunc.h>
     86 #include <machine/psl.h>
     87 #include <machine/reg.h>
     88 #include <machine/segments.h>
     89 #include <machine/specialreg.h>
     90 #include <machine/sysarch.h>
     91 #include <machine/vm86.h>
     92 #include <machine/vmparam.h>
     93 
     94 /*
     95  * To see whether wscons is configured (for virtual console ioctl calls).
     96  */
     97 #if defined(_KERNEL_OPT)
     98 #include "wsdisplay.h"
     99 #endif
    100 #if (NWSDISPLAY > 0)
    101 #include <dev/wscons/wsconsio.h>
    102 #include <dev/wscons/wsdisplay_usl_io.h>
    103 #if defined(_KERNEL_OPT)
    104 #include "opt_xserver.h"
    105 #endif
    106 #endif
    107 
    108 #ifdef USER_LDT
    109 #include <machine/cpu.h>
    110 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    111     register_t *));
    112 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    113     register_t *));
    114 #endif
    115 
    116 #ifdef DEBUG_LINUX
    117 #define DPRINTF(a) uprintf a
    118 #else
    119 #define DPRINTF(a)
    120 #endif
    121 
    122 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    123 extern struct disklist *i386_alldisks;
    124 
    125 /*
    126  * Deal with some i386-specific things in the Linux emulation code.
    127  */
    128 
    129 void
    130 linux_setregs(l, epp, stack)
    131 	struct lwp *l;
    132 	struct exec_package *epp;
    133 	u_long stack;
    134 {
    135 	struct pcb *pcb = &l->l_addr->u_pcb;
    136 	struct trapframe *tf;
    137 
    138 #if NNPX > 0
    139 	/* If we were using the FPU, forget about it. */
    140 	if (npxproc == l)
    141 		npxdrop();
    142 #endif
    143 
    144 #ifdef USER_LDT
    145 	pmap_ldt_cleanup(l);
    146 #endif
    147 
    148 	l->l_md.md_flags &= ~MDP_USEDFPU;
    149 	pcb->pcb_flags = 0;
    150 
    151 	if (i386_use_fxsave) {
    152 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    153 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    154 	} else
    155 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    156 
    157 	tf = l->l_md.md_regs;
    158 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    159 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    160 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    161 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    162 	tf->tf_edi = 0;
    163 	tf->tf_esi = 0;
    164 	tf->tf_ebp = 0;
    165 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    166 	tf->tf_edx = 0;
    167 	tf->tf_ecx = 0;
    168 	tf->tf_eax = 0;
    169 	tf->tf_eip = epp->ep_entry;
    170 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    171 	tf->tf_eflags = PSL_USERSET;
    172 	tf->tf_esp = stack;
    173 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    174 }
    175 
    176 /*
    177  * Send an interrupt to process.
    178  *
    179  * Stack is set up to allow sigcode stored
    180  * in u. to call routine, followed by kcall
    181  * to sigreturn routine below.  After sigreturn
    182  * resets the signal mask, the stack, and the
    183  * frame pointer, it returns to the user
    184  * specified pc, psl.
    185  */
    186 
    187 void
    188 linux_sendsig(sig, mask, code)
    189 	int sig;
    190 	sigset_t *mask;
    191 	u_long code;
    192 {
    193 	struct lwp *l = curlwp;
    194 	struct proc *p = l->l_proc;
    195 	struct trapframe *tf;
    196 	struct linux_sigframe *fp, frame;
    197 	int onstack;
    198 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    199 
    200 	tf = l->l_md.md_regs;
    201 	/* Do we need to jump onto the signal stack? */
    202 	onstack =
    203 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    204 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    205 
    206 	/* Allocate space for the signal handler context. */
    207 	if (onstack)
    208 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    209 					  p->p_sigctx.ps_sigstk.ss_size);
    210 	else
    211 		fp = (struct linux_sigframe *)tf->tf_esp;
    212 	fp--;
    213 
    214 	/* Build stack frame for signal trampoline. */
    215 	frame.sf_handler = catcher;
    216 	frame.sf_sig = native_to_linux_signo[sig];
    217 
    218 	/* Save register context. */
    219 #ifdef VM86
    220 	if (tf->tf_eflags & PSL_VM) {
    221 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    222 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    223 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    224 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    225 		frame.sf_sc.sc_eflags = get_vflags(l);
    226 	} else
    227 #endif
    228 	{
    229 		frame.sf_sc.sc_gs = tf->tf_gs;
    230 		frame.sf_sc.sc_fs = tf->tf_fs;
    231 		frame.sf_sc.sc_es = tf->tf_es;
    232 		frame.sf_sc.sc_ds = tf->tf_ds;
    233 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    234 	}
    235 	frame.sf_sc.sc_edi = tf->tf_edi;
    236 	frame.sf_sc.sc_esi = tf->tf_esi;
    237 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    238 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    239 	frame.sf_sc.sc_edx = tf->tf_edx;
    240 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    241 	frame.sf_sc.sc_eax = tf->tf_eax;
    242 	frame.sf_sc.sc_eip = tf->tf_eip;
    243 	frame.sf_sc.sc_cs = tf->tf_cs;
    244 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    245 	frame.sf_sc.sc_ss = tf->tf_ss;
    246 	frame.sf_sc.sc_err = tf->tf_err;
    247 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    248 	frame.sf_sc.sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    249 
    250 	/* Save signal stack. */
    251 	/* Linux doesn't save the onstack flag in sigframe */
    252 
    253 	/* Save signal mask. */
    254 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
    255 
    256 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    257 		/*
    258 		 * Process has trashed its stack; give it an illegal
    259 		 * instruction to halt it in its tracks.
    260 		 */
    261 		sigexit(l, SIGILL);
    262 		/* NOTREACHED */
    263 	}
    264 
    265 	/*
    266 	 * Build context to run handler in.
    267 	 */
    268 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    269 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    270 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    271 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    272 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    273 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    274 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    275 	tf->tf_esp = (int)fp;
    276 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    277 
    278 	/* Remember that we're now on the signal stack. */
    279 	if (onstack)
    280 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    281 }
    282 
    283 /*
    284  * System call to cleanup state after a signal
    285  * has been taken.  Reset signal mask and
    286  * stack state from context left by sendsig (above).
    287  * Return to previous pc and psl as specified by
    288  * context left by sendsig. Check carefully to
    289  * make sure that the user has not modified the
    290  * psl to gain improper privileges or to cause
    291  * a machine fault.
    292  */
    293 int
    294 linux_sys_rt_sigreturn(l, v, retval)
    295 	struct lwp *l;
    296 	void *v;
    297 	register_t *retval;
    298 {
    299 	/* XXX XAX write me */
    300 	return(ENOSYS);
    301 }
    302 
    303 int
    304 linux_sys_sigreturn(l, v, retval)
    305 	struct lwp *l;
    306 	void *v;
    307 	register_t *retval;
    308 {
    309 	struct linux_sys_sigreturn_args /* {
    310 		syscallarg(struct linux_sigcontext *) scp;
    311 	} */ *uap = v;
    312 	struct proc *p = l->l_proc;
    313 	struct linux_sigcontext *scp, context;
    314 	struct trapframe *tf;
    315 	sigset_t mask;
    316 	ssize_t ss_gap;
    317 
    318 	/*
    319 	 * The trampoline code hands us the context.
    320 	 * It is unsafe to keep track of it ourselves, in the event that a
    321 	 * program jumps out of a signal handler.
    322 	 */
    323 	scp = SCARG(uap, scp);
    324 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    325 		return (EFAULT);
    326 
    327 	/* Restore register context. */
    328 	tf = l->l_md.md_regs;
    329 #ifdef VM86
    330 	if (context.sc_eflags & PSL_VM) {
    331 		tf->tf_vm86_gs = context.sc_gs;
    332 		tf->tf_vm86_fs = context.sc_fs;
    333 		tf->tf_vm86_es = context.sc_es;
    334 		tf->tf_vm86_ds = context.sc_ds;
    335 		set_vflags(l, context.sc_eflags);
    336 	} else
    337 #endif
    338 	{
    339 		/*
    340 		 * Check for security violations.  If we're returning to
    341 		 * protected mode, the CPU will validate the segment registers
    342 		 * automatically and generate a trap on violations.  We handle
    343 		 * the trap, rather than doing all of the checking here.
    344 		 */
    345 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    346 		    !USERMODE(context.sc_cs, context.sc_eflags))
    347 			return (EINVAL);
    348 
    349 		tf->tf_gs = context.sc_gs;
    350 		tf->tf_fs = context.sc_fs;
    351 		tf->tf_es = context.sc_es;
    352 		tf->tf_ds = context.sc_ds;
    353 		tf->tf_eflags = context.sc_eflags;
    354 	}
    355 	tf->tf_edi = context.sc_edi;
    356 	tf->tf_esi = context.sc_esi;
    357 	tf->tf_ebp = context.sc_ebp;
    358 	tf->tf_ebx = context.sc_ebx;
    359 	tf->tf_edx = context.sc_edx;
    360 	tf->tf_ecx = context.sc_ecx;
    361 	tf->tf_eax = context.sc_eax;
    362 	tf->tf_eip = context.sc_eip;
    363 	tf->tf_cs = context.sc_cs;
    364 	tf->tf_esp = context.sc_esp_at_signal;
    365 	tf->tf_ss = context.sc_ss;
    366 
    367 	/* Restore signal stack. */
    368 	/*
    369 	 * Linux really does it this way; it doesn't have space in sigframe
    370 	 * to save the onstack flag.
    371 	 */
    372 	ss_gap = (ssize_t)
    373 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    374 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    375 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    376 	else
    377 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    378 
    379 	/* Restore signal mask. */
    380 	linux_old_to_native_sigset(&mask, &context.sc_mask);
    381 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    382 
    383 	return (EJUSTRETURN);
    384 }
    385 
    386 #ifdef USER_LDT
    387 
    388 int
    389 linux_read_ldt(l, uap, retval)
    390 	struct lwp *l;
    391 	struct linux_sys_modify_ldt_args /* {
    392 		syscallarg(int) func;
    393 		syscallarg(void *) ptr;
    394 		syscallarg(size_t) bytecount;
    395 	} */ *uap;
    396 	register_t *retval;
    397 {
    398 	struct proc *p = l->l_proc;
    399 	struct i386_get_ldt_args gl;
    400 	int error;
    401 	caddr_t sg;
    402 	char *parms;
    403 
    404 	DPRINTF(("linux_read_ldt!"));
    405 	sg = stackgap_init(p, 0);
    406 
    407 	gl.start = 0;
    408 	gl.desc = SCARG(uap, ptr);
    409 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    410 
    411 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    412 
    413 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    414 		return (error);
    415 
    416 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    417 		return (error);
    418 
    419 	*retval *= sizeof(union descriptor);
    420 	return (0);
    421 }
    422 
    423 struct linux_ldt_info {
    424 	u_int entry_number;
    425 	u_long base_addr;
    426 	u_int limit;
    427 	u_int seg_32bit:1;
    428 	u_int contents:2;
    429 	u_int read_exec_only:1;
    430 	u_int limit_in_pages:1;
    431 	u_int seg_not_present:1;
    432 	u_int useable:1;
    433 };
    434 
    435 int
    436 linux_write_ldt(l, uap, retval)
    437 	struct lwp *l;
    438 	struct linux_sys_modify_ldt_args /* {
    439 		syscallarg(int) func;
    440 		syscallarg(void *) ptr;
    441 		syscallarg(size_t) bytecount;
    442 	} */ *uap;
    443 	register_t *retval;
    444 {
    445 	struct proc *p = l->l_proc;
    446 	struct linux_ldt_info ldt_info;
    447 	struct segment_descriptor sd;
    448 	struct i386_set_ldt_args sl;
    449 	int error;
    450 	caddr_t sg;
    451 	char *parms;
    452 	int oldmode = (int)retval[0];
    453 
    454 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    455 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    456 		return (EINVAL);
    457 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    458 		return error;
    459 	if (ldt_info.entry_number >= 8192)
    460 		return (EINVAL);
    461 	if (ldt_info.contents == 3) {
    462 		if (oldmode)
    463 			return (EINVAL);
    464 		if (ldt_info.seg_not_present)
    465 			return (EINVAL);
    466 	}
    467 
    468 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    469 	    (oldmode || (ldt_info.contents == 0 &&
    470 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    471 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    472 	    ldt_info.useable == 0))) {
    473 		/* this means you should zero the ldt */
    474 		(void)memset(&sd, 0, sizeof(sd));
    475 	} else {
    476 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    477 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    478 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    479 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    480 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    481 		    (!ldt_info.read_exec_only << 1);
    482 		sd.sd_dpl = SEL_UPL;
    483 		sd.sd_p = !ldt_info.seg_not_present;
    484 		sd.sd_def32 = ldt_info.seg_32bit;
    485 		sd.sd_gran = ldt_info.limit_in_pages;
    486 		if (!oldmode)
    487 			sd.sd_xx = ldt_info.useable;
    488 		else
    489 			sd.sd_xx = 0;
    490 	}
    491 	sg = stackgap_init(p, 0);
    492 	sl.start = ldt_info.entry_number;
    493 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    494 	sl.num = 1;
    495 
    496 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    497 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    498 
    499 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    500 
    501 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    502 		return (error);
    503 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    504 		return (error);
    505 
    506 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    507 		return (error);
    508 
    509 	*retval = 0;
    510 	return (0);
    511 }
    512 
    513 #endif /* USER_LDT */
    514 
    515 int
    516 linux_sys_modify_ldt(l, v, retval)
    517 	struct lwp *l;
    518 	void *v;
    519 	register_t *retval;
    520 {
    521 	struct linux_sys_modify_ldt_args /* {
    522 		syscallarg(int) func;
    523 		syscallarg(void *) ptr;
    524 		syscallarg(size_t) bytecount;
    525 	} */ *uap = v;
    526 
    527 	switch (SCARG(uap, func)) {
    528 #ifdef USER_LDT
    529 	case 0:
    530 		return linux_read_ldt(l, uap, retval);
    531 	case 1:
    532 		retval[0] = 1;
    533 		return linux_write_ldt(l, uap, retval);
    534 	case 2:
    535 #ifdef notyet
    536 		return (linux_read_default_ldt(l, uap, retval);
    537 #else
    538 		return (ENOSYS);
    539 #endif
    540 	case 0x11:
    541 		retval[0] = 0;
    542 		return linux_write_ldt(l, uap, retval);
    543 #endif /* USER_LDT */
    544 
    545 	default:
    546 		return (ENOSYS);
    547 	}
    548 }
    549 
    550 /*
    551  * XXX Pathetic hack to make svgalib work. This will fake the major
    552  * device number of an opened VT so that svgalib likes it. grmbl.
    553  * Should probably do it 'wrong the right way' and use a mapping
    554  * array for all major device numbers, and map linux_mknod too.
    555  */
    556 dev_t
    557 linux_fakedev(dev, raw)
    558 	dev_t dev;
    559 	int raw;
    560 {
    561 	if (raw) {
    562 #if (NWSDISPLAY > 0)
    563 		extern const struct cdevsw wsdisplay_cdevsw;
    564 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
    565 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    566 #endif
    567 	}
    568 
    569 	return dev;
    570 }
    571 
    572 #if (NWSDISPLAY > 0)
    573 /*
    574  * That's not complete, but enough to get an X server running.
    575  */
    576 #define NR_KEYS 128
    577 static const u_short plain_map[NR_KEYS] = {
    578 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    579 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    580 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    581 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    582 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    583 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    584 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    585 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    586 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    587 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    588 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    589 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    590 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    591 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    592 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    593 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    594 }, shift_map[NR_KEYS] = {
    595 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    596 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    597 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    598 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    599 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    600 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    601 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    602 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    603 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    604 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    605 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    606 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    607 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    608 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    609 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    610 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    611 }, altgr_map[NR_KEYS] = {
    612 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    613 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    614 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    615 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    616 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    617 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    618 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    619 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    620 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    621 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    622 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    623 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    624 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    625 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    626 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    627 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    628 }, ctrl_map[NR_KEYS] = {
    629 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    630 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    631 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    632 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    633 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    634 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    635 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    636 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    637 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    638 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    639 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    640 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    641 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    642 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    643 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    644 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    645 };
    646 
    647 const u_short * const linux_keytabs[] = {
    648 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    649 };
    650 #endif
    651 
    652 static struct biosdisk_info *
    653 fd2biosinfo(p, fp)
    654 	struct proc *p;
    655 	struct file *fp;
    656 {
    657 	struct vnode *vp;
    658 	const char *blkname;
    659 	char diskname[16];
    660 	int i;
    661 	struct nativedisk_info *nip;
    662 	struct disklist *dl = i386_alldisks;
    663 
    664 	if (fp->f_type != DTYPE_VNODE)
    665 		return NULL;
    666 	vp = (struct vnode *)fp->f_data;
    667 
    668 	if (vp->v_type != VBLK)
    669 		return NULL;
    670 
    671 	blkname = devsw_blk2name(major(vp->v_rdev));
    672 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    673 	    DISKUNIT(vp->v_rdev));
    674 
    675 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    676 		nip = &dl->dl_nativedisks[i];
    677 		if (strcmp(diskname, nip->ni_devname))
    678 			continue;
    679 		if (nip->ni_nmatches != 0)
    680 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    681 	}
    682 
    683 	return NULL;
    684 }
    685 
    686 
    687 /*
    688  * We come here in a last attempt to satisfy a Linux ioctl() call
    689  */
    690 int
    691 linux_machdepioctl(p, v, retval)
    692 	struct proc *p;
    693 	void *v;
    694 	register_t *retval;
    695 {
    696 	struct linux_sys_ioctl_args /* {
    697 		syscallarg(int) fd;
    698 		syscallarg(u_long) com;
    699 		syscallarg(caddr_t) data;
    700 	} */ *uap = v;
    701 	struct sys_ioctl_args bia;
    702 	u_long com;
    703 	int error, error1;
    704 #if (NWSDISPLAY > 0)
    705 	struct vt_mode lvt;
    706 	caddr_t bvtp, sg;
    707 	struct kbentry kbe;
    708 #endif
    709 	struct linux_hd_geometry hdg;
    710 	struct linux_hd_big_geometry hdg_big;
    711 	struct biosdisk_info *bip;
    712 	struct filedesc *fdp;
    713 	struct file *fp;
    714 	int fd;
    715 	struct disklabel label, *labp;
    716 	struct partinfo partp;
    717 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    718 	u_long start, biostotal, realtotal;
    719 	u_char heads, sectors;
    720 	u_int cylinders;
    721 	struct ioctl_pt pt;
    722 
    723 	fd = SCARG(uap, fd);
    724 	SCARG(&bia, fd) = fd;
    725 	SCARG(&bia, data) = SCARG(uap, data);
    726 	com = SCARG(uap, com);
    727 
    728 	fdp = p->p_fd;
    729 
    730 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    731 		return (EBADF);
    732 
    733 	switch (com) {
    734 #if (NWSDISPLAY > 0)
    735 	case LINUX_KDGKBMODE:
    736 		com = KDGKBMODE;
    737 		break;
    738 	case LINUX_KDSKBMODE:
    739 		com = KDSKBMODE;
    740 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    741 			SCARG(&bia, data) = (caddr_t)K_RAW;
    742 		break;
    743 	case LINUX_KIOCSOUND:
    744 		SCARG(&bia, data) =
    745 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    746 		/* fall through */
    747 	case LINUX_KDMKTONE:
    748 		com = KDMKTONE;
    749 		break;
    750 	case LINUX_KDSETMODE:
    751 		com = KDSETMODE;
    752 		break;
    753 	case LINUX_KDGETMODE:
    754 		/* KD_* values are equal to the wscons numbers */
    755 		com = WSDISPLAYIO_GMODE;
    756 		break;
    757 	case LINUX_KDENABIO:
    758 		com = KDENABIO;
    759 		break;
    760 	case LINUX_KDDISABIO:
    761 		com = KDDISABIO;
    762 		break;
    763 	case LINUX_KDGETLED:
    764 		com = KDGETLED;
    765 		break;
    766 	case LINUX_KDSETLED:
    767 		com = KDSETLED;
    768 		break;
    769 	case LINUX_VT_OPENQRY:
    770 		com = VT_OPENQRY;
    771 		break;
    772 	case LINUX_VT_GETMODE:
    773 		SCARG(&bia, com) = VT_GETMODE;
    774 		/* XXX NJWLWP */
    775 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    776 			return error;
    777 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    778 		    sizeof (struct vt_mode))))
    779 			return error;
    780 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    781 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    782 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    783 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    784 		    sizeof (struct vt_mode));
    785 	case LINUX_VT_SETMODE:
    786 		com = VT_SETMODE;
    787 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    788 		    sizeof (struct vt_mode))))
    789 			return error;
    790 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    791 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    792 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    793 		sg = stackgap_init(p, 0);
    794 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    795 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    796 			return error;
    797 		SCARG(&bia, data) = bvtp;
    798 		break;
    799 	case LINUX_VT_DISALLOCATE:
    800 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    801 		return 0;
    802 	case LINUX_VT_RELDISP:
    803 		com = VT_RELDISP;
    804 		break;
    805 	case LINUX_VT_ACTIVATE:
    806 		com = VT_ACTIVATE;
    807 		break;
    808 	case LINUX_VT_WAITACTIVE:
    809 		com = VT_WAITACTIVE;
    810 		break;
    811 	case LINUX_VT_GETSTATE:
    812 		com = VT_GETSTATE;
    813 		break;
    814 	case LINUX_KDGKBTYPE:
    815 		/* This is what Linux does. */
    816 		return (subyte(SCARG(uap, data), KB_101));
    817 	case LINUX_KDGKBENT:
    818 		/*
    819 		 * The Linux KDGKBENT ioctl is different from the
    820 		 * SYSV original. So we handle it in machdep code.
    821 		 * XXX We should use keyboard mapping information
    822 		 * from wsdisplay, but this would be expensive.
    823 		 */
    824 		if ((error = copyin(SCARG(uap, data), &kbe,
    825 				    sizeof(struct kbentry))))
    826 			return (error);
    827 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    828 		    || kbe.kb_index >= NR_KEYS)
    829 			return (EINVAL);
    830 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    831 		return (copyout(&kbe, SCARG(uap, data),
    832 				sizeof(struct kbentry)));
    833 #endif
    834 	case LINUX_HDIO_GETGEO:
    835 	case LINUX_HDIO_GETGEO_BIG:
    836 		/*
    837 		 * Try to mimic Linux behaviour: return the BIOS geometry
    838 		 * if possible (extending its # of cylinders if it's beyond
    839 		 * the 1023 limit), fall back to the MI geometry (i.e.
    840 		 * the real geometry) if not found, by returning an
    841 		 * error. See common/linux_hdio.c
    842 		 */
    843 		FILE_USE(fp);
    844 		bip = fd2biosinfo(p, fp);
    845 		ioctlf = fp->f_ops->fo_ioctl;
    846 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    847 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    848 		FILE_UNUSE(fp, p);
    849 		if (error != 0 && error1 != 0)
    850 			return error1;
    851 		labp = error != 0 ? &label : partp.disklab;
    852 		start = error1 != 0 ? partp.part->p_offset : 0;
    853 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    854 		    && bip->bi_cyl != 0) {
    855 			heads = bip->bi_head;
    856 			sectors = bip->bi_sec;
    857 			cylinders = bip->bi_cyl;
    858 			biostotal = heads * sectors * cylinders;
    859 			realtotal = labp->d_ntracks * labp->d_nsectors *
    860 			    labp->d_ncylinders;
    861 			if (realtotal > biostotal)
    862 				cylinders = realtotal / (heads * sectors);
    863 		} else {
    864 			heads = labp->d_ntracks;
    865 			cylinders = labp->d_ncylinders;
    866 			sectors = labp->d_nsectors;
    867 		}
    868 		if (com == LINUX_HDIO_GETGEO) {
    869 			hdg.start = start;
    870 			hdg.heads = heads;
    871 			hdg.cylinders = cylinders;
    872 			hdg.sectors = sectors;
    873 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    874 		} else {
    875 			hdg_big.start = start;
    876 			hdg_big.heads = heads;
    877 			hdg_big.cylinders = cylinders;
    878 			hdg_big.sectors = sectors;
    879 			return copyout(&hdg_big, SCARG(uap, data),
    880 			    sizeof hdg_big);
    881 		}
    882 
    883 	default:
    884 		/*
    885 		 * Unknown to us. If it's on a device, just pass it through
    886 		 * using PTIOCLINUX, the device itself might be able to
    887 		 * make some sense of it.
    888 		 * XXX hack: if the function returns EJUSTRETURN,
    889 		 * it has stuffed a sysctl return value in pt.data.
    890 		 */
    891 		FILE_USE(fp);
    892 		ioctlf = fp->f_ops->fo_ioctl;
    893 		pt.com = SCARG(uap, com);
    894 		pt.data = SCARG(uap, data);
    895 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    896 		FILE_UNUSE(fp, p);
    897 		if (error == EJUSTRETURN) {
    898 			retval[0] = (register_t)pt.data;
    899 			error = 0;
    900 		}
    901 
    902 		if (error == ENOTTY)
    903 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
    904 			    com));
    905 		return error;
    906 	}
    907 	SCARG(&bia, com) = com;
    908 	/* XXX NJWLWP */
    909 	return sys_ioctl(curlwp, &bia, retval);
    910 }
    911 
    912 /*
    913  * Set I/O permissions for a process. Just set the maximum level
    914  * right away (ignoring the argument), otherwise we would have
    915  * to rely on I/O permission maps, which are not implemented.
    916  */
    917 int
    918 linux_sys_iopl(l, v, retval)
    919 	struct lwp *l;
    920 	void *v;
    921 	register_t *retval;
    922 {
    923 #if 0
    924 	struct linux_sys_iopl_args /* {
    925 		syscallarg(int) level;
    926 	} */ *uap = v;
    927 #endif
    928 	struct proc *p = l->l_proc;
    929 	struct trapframe *fp = l->l_md.md_regs;
    930 
    931 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    932 		return EPERM;
    933 	fp->tf_eflags |= PSL_IOPL;
    934 	*retval = 0;
    935 	return 0;
    936 }
    937 
    938 /*
    939  * See above. If a root process tries to set access to an I/O port,
    940  * just let it have the whole range.
    941  */
    942 int
    943 linux_sys_ioperm(l, v, retval)
    944 	struct lwp *l;
    945 	void *v;
    946 	register_t *retval;
    947 {
    948 	struct linux_sys_ioperm_args /* {
    949 		syscallarg(unsigned int) lo;
    950 		syscallarg(unsigned int) hi;
    951 		syscallarg(int) val;
    952 	} */ *uap = v;
    953 	struct proc *p = l->l_proc;
    954 	struct trapframe *fp = l->l_md.md_regs;
    955 
    956 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    957 		return EPERM;
    958 	if (SCARG(uap, val))
    959 		fp->tf_eflags |= PSL_IOPL;
    960 	*retval = 0;
    961 	return 0;
    962 }
    963