Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.62.2.7
      1 /*	$NetBSD: linux_machdep.c,v 1.62.2.7 2002/04/01 07:44:10 nathanw Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.62.2.7 2002/04/01 07:44:10 nathanw Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/map.h>
     52 #include <sys/lwp.h>
     53 #include <sys/proc.h>
     54 #include <sys/user.h>
     55 #include <sys/buf.h>
     56 #include <sys/reboot.h>
     57 #include <sys/conf.h>
     58 #include <sys/exec.h>
     59 #include <sys/file.h>
     60 #include <sys/callout.h>
     61 #include <sys/malloc.h>
     62 #include <sys/mbuf.h>
     63 #include <sys/msgbuf.h>
     64 #include <sys/mount.h>
     65 #include <sys/vnode.h>
     66 #include <sys/device.h>
     67 #include <sys/syscallargs.h>
     68 #include <sys/filedesc.h>
     69 #include <sys/exec_elf.h>
     70 #include <sys/disklabel.h>
     71 #include <sys/ioctl.h>
     72 #include <miscfs/specfs/specdev.h>
     73 
     74 #include <compat/linux/common/linux_types.h>
     75 #include <compat/linux/common/linux_signal.h>
     76 #include <compat/linux/common/linux_util.h>
     77 #include <compat/linux/common/linux_ioctl.h>
     78 #include <compat/linux/common/linux_hdio.h>
     79 #include <compat/linux/common/linux_exec.h>
     80 #include <compat/linux/common/linux_machdep.h>
     81 
     82 #include <compat/linux/linux_syscallargs.h>
     83 
     84 #include <machine/cpu.h>
     85 #include <machine/cpufunc.h>
     86 #include <machine/psl.h>
     87 #include <machine/reg.h>
     88 #include <machine/segments.h>
     89 #include <machine/specialreg.h>
     90 #include <machine/sysarch.h>
     91 #include <machine/vm86.h>
     92 #include <machine/vmparam.h>
     93 
     94 /*
     95  * To see whether wscons is configured (for virtual console ioctl calls).
     96  */
     97 #if defined(_KERNEL_OPT)
     98 #include "wsdisplay.h"
     99 #endif
    100 #if (NWSDISPLAY > 0)
    101 #include <dev/wscons/wsconsio.h>
    102 #include <dev/wscons/wsdisplay_usl_io.h>
    103 #if defined(_KERNEL_OPT)
    104 #include "opt_xserver.h"
    105 #endif
    106 #endif
    107 
    108 #ifdef USER_LDT
    109 #include <machine/cpu.h>
    110 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    111     register_t *));
    112 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    113     register_t *));
    114 #endif
    115 
    116 #ifdef DEBUG_LINUX
    117 #define DPRINTF(a) uprintf a
    118 #else
    119 #define DPRINTF(a)
    120 #endif
    121 
    122 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    123 extern struct disklist *i386_alldisks;
    124 extern const char *findblkname __P((int));
    125 
    126 /*
    127  * Deal with some i386-specific things in the Linux emulation code.
    128  */
    129 
    130 void
    131 linux_setregs(l, epp, stack)
    132 	struct lwp *l;
    133 	struct exec_package *epp;
    134 	u_long stack;
    135 {
    136 	struct pcb *pcb = &l->l_addr->u_pcb;
    137 
    138 	setregs(l, epp, stack);
    139 	if (i386_use_fxsave)
    140 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    141 	else
    142 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    143 }
    144 
    145 /*
    146  * Send an interrupt to process.
    147  *
    148  * Stack is set up to allow sigcode stored
    149  * in u. to call routine, followed by kcall
    150  * to sigreturn routine below.  After sigreturn
    151  * resets the signal mask, the stack, and the
    152  * frame pointer, it returns to the user
    153  * specified pc, psl.
    154  */
    155 
    156 void
    157 linux_sendsig(catcher, sig, mask, code)
    158 	sig_t catcher;
    159 	int sig;
    160 	sigset_t *mask;
    161 	u_long code;
    162 {
    163 	struct lwp *l = curproc;
    164 	struct proc *p = l->l_proc;
    165 	struct trapframe *tf;
    166 	struct linux_sigframe *fp, frame;
    167 	int onstack;
    168 
    169 	tf = l->l_md.md_regs;
    170 	/* Do we need to jump onto the signal stack? */
    171 	onstack =
    172 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    173 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    174 
    175 	/* Allocate space for the signal handler context. */
    176 	if (onstack)
    177 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    178 					  p->p_sigctx.ps_sigstk.ss_size);
    179 	else
    180 		fp = (struct linux_sigframe *)tf->tf_esp;
    181 	fp--;
    182 
    183 	/* Build stack frame for signal trampoline. */
    184 	frame.sf_handler = catcher;
    185 	frame.sf_sig = native_to_linux_sig[sig];
    186 
    187 	/* Save register context. */
    188 #ifdef VM86
    189 	if (tf->tf_eflags & PSL_VM) {
    190 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    191 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    192 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    193 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    194 		frame.sf_sc.sc_eflags = get_vflags(l);
    195 	} else
    196 #endif
    197 	{
    198 		frame.sf_sc.sc_gs = tf->tf_gs;
    199 		frame.sf_sc.sc_fs = tf->tf_fs;
    200 		frame.sf_sc.sc_es = tf->tf_es;
    201 		frame.sf_sc.sc_ds = tf->tf_ds;
    202 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    203 	}
    204 	frame.sf_sc.sc_edi = tf->tf_edi;
    205 	frame.sf_sc.sc_esi = tf->tf_esi;
    206 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    207 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    208 	frame.sf_sc.sc_edx = tf->tf_edx;
    209 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    210 	frame.sf_sc.sc_eax = tf->tf_eax;
    211 	frame.sf_sc.sc_eip = tf->tf_eip;
    212 	frame.sf_sc.sc_cs = tf->tf_cs;
    213 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    214 	frame.sf_sc.sc_ss = tf->tf_ss;
    215 	frame.sf_sc.sc_err = tf->tf_err;
    216 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    217 	frame.sf_sc.sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    218 
    219 	/* Save signal stack. */
    220 	/* Linux doesn't save the onstack flag in sigframe */
    221 
    222 	/* Save signal mask. */
    223 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
    224 
    225 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    226 		/*
    227 		 * Process has trashed its stack; give it an illegal
    228 		 * instruction to halt it in its tracks.
    229 		 */
    230 		sigexit(l, SIGILL);
    231 		/* NOTREACHED */
    232 	}
    233 
    234 	/*
    235 	 * Build context to run handler in.
    236 	 */
    237 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    238 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    239 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    240 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    241 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    242 	tf->tf_esp = (int)fp;
    243 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    244 
    245 	/* Remember that we're now on the signal stack. */
    246 	if (onstack)
    247 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    248 }
    249 
    250 /*
    251  * System call to cleanup state after a signal
    252  * has been taken.  Reset signal mask and
    253  * stack state from context left by sendsig (above).
    254  * Return to previous pc and psl as specified by
    255  * context left by sendsig. Check carefully to
    256  * make sure that the user has not modified the
    257  * psl to gain improper privileges or to cause
    258  * a machine fault.
    259  */
    260 int
    261 linux_sys_rt_sigreturn(l, v, retval)
    262 	struct lwp *l;
    263 	void *v;
    264 	register_t *retval;
    265 {
    266 	/* XXX XAX write me */
    267 	return(ENOSYS);
    268 }
    269 
    270 int
    271 linux_sys_sigreturn(l, v, retval)
    272 	struct lwp *l;
    273 	void *v;
    274 	register_t *retval;
    275 {
    276 	struct linux_sys_sigreturn_args /* {
    277 		syscallarg(struct linux_sigcontext *) scp;
    278 	} */ *uap = v;
    279 	struct proc *p = l->l_proc;
    280 	struct linux_sigcontext *scp, context;
    281 	struct trapframe *tf;
    282 	sigset_t mask;
    283 	ssize_t ss_gap;
    284 
    285 	/*
    286 	 * The trampoline code hands us the context.
    287 	 * It is unsafe to keep track of it ourselves, in the event that a
    288 	 * program jumps out of a signal handler.
    289 	 */
    290 	scp = SCARG(uap, scp);
    291 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    292 		return (EFAULT);
    293 
    294 	/* Restore register context. */
    295 	tf = l->l_md.md_regs;
    296 #ifdef VM86
    297 	if (context.sc_eflags & PSL_VM) {
    298 		tf->tf_vm86_gs = context.sc_gs;
    299 		tf->tf_vm86_fs = context.sc_fs;
    300 		tf->tf_vm86_es = context.sc_es;
    301 		tf->tf_vm86_ds = context.sc_ds;
    302 		set_vflags(l, context.sc_eflags);
    303 	} else
    304 #endif
    305 	{
    306 		/*
    307 		 * Check for security violations.  If we're returning to
    308 		 * protected mode, the CPU will validate the segment registers
    309 		 * automatically and generate a trap on violations.  We handle
    310 		 * the trap, rather than doing all of the checking here.
    311 		 */
    312 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    313 		    !USERMODE(context.sc_cs, context.sc_eflags))
    314 			return (EINVAL);
    315 
    316 		/* %fs and %gs were restored by the trampoline. */
    317 		tf->tf_es = context.sc_es;
    318 		tf->tf_ds = context.sc_ds;
    319 		tf->tf_eflags = context.sc_eflags;
    320 	}
    321 	tf->tf_edi = context.sc_edi;
    322 	tf->tf_esi = context.sc_esi;
    323 	tf->tf_ebp = context.sc_ebp;
    324 	tf->tf_ebx = context.sc_ebx;
    325 	tf->tf_edx = context.sc_edx;
    326 	tf->tf_ecx = context.sc_ecx;
    327 	tf->tf_eax = context.sc_eax;
    328 	tf->tf_eip = context.sc_eip;
    329 	tf->tf_cs = context.sc_cs;
    330 	tf->tf_esp = context.sc_esp_at_signal;
    331 	tf->tf_ss = context.sc_ss;
    332 
    333 	/* Restore signal stack. */
    334 	/*
    335 	 * Linux really does it this way; it doesn't have space in sigframe
    336 	 * to save the onstack flag.
    337 	 */
    338 	ss_gap = (ssize_t)
    339 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    340 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    341 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    342 	else
    343 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    344 
    345 	/* Restore signal mask. */
    346 	linux_old_to_native_sigset(&mask, &context.sc_mask);
    347 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    348 
    349 	return (EJUSTRETURN);
    350 }
    351 
    352 #ifdef USER_LDT
    353 
    354 int
    355 linux_read_ldt(l, uap, retval)
    356 	struct lwp *l;
    357 	struct linux_sys_modify_ldt_args /* {
    358 		syscallarg(int) func;
    359 		syscallarg(void *) ptr;
    360 		syscallarg(size_t) bytecount;
    361 	} */ *uap;
    362 	register_t *retval;
    363 {
    364 	struct proc *p = l->l_proc;
    365 	struct i386_get_ldt_args gl;
    366 	int error;
    367 	caddr_t sg;
    368 	char *parms;
    369 
    370 	DPRINTF(("linux_read_ldt!"));
    371 	sg = stackgap_init(p, 0);
    372 
    373 	gl.start = 0;
    374 	gl.desc = SCARG(uap, ptr);
    375 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    376 
    377 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    378 
    379 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    380 		return (error);
    381 
    382 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    383 		return (error);
    384 
    385 	*retval *= sizeof(union descriptor);
    386 	return (0);
    387 }
    388 
    389 struct linux_ldt_info {
    390 	u_int entry_number;
    391 	u_long base_addr;
    392 	u_int limit;
    393 	u_int seg_32bit:1;
    394 	u_int contents:2;
    395 	u_int read_exec_only:1;
    396 	u_int limit_in_pages:1;
    397 	u_int seg_not_present:1;
    398 	u_int useable:1;
    399 };
    400 
    401 int
    402 linux_write_ldt(l, uap, retval)
    403 	struct lwp *l;
    404 	struct linux_sys_modify_ldt_args /* {
    405 		syscallarg(int) func;
    406 		syscallarg(void *) ptr;
    407 		syscallarg(size_t) bytecount;
    408 	} */ *uap;
    409 	register_t *retval;
    410 {
    411 	struct proc *p = l->l_proc;
    412 	struct linux_ldt_info ldt_info;
    413 	struct segment_descriptor sd;
    414 	struct i386_set_ldt_args sl;
    415 	int error;
    416 	caddr_t sg;
    417 	char *parms;
    418 	int oldmode = (int)retval[0];
    419 
    420 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    421 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    422 		return (EINVAL);
    423 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    424 		return error;
    425 	if (ldt_info.entry_number >= 8192)
    426 		return (EINVAL);
    427 	if (ldt_info.contents == 3) {
    428 		if (oldmode)
    429 			return (EINVAL);
    430 		if (ldt_info.seg_not_present)
    431 			return (EINVAL);
    432 	}
    433 
    434 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    435 	    (oldmode || (ldt_info.contents == 0 &&
    436 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    437 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    438 	    ldt_info.useable == 0))) {
    439 		/* this means you should zero the ldt */
    440 		(void)memset(&sd, 0, sizeof(sd));
    441 	} else {
    442 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    443 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    444 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    445 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    446 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    447 		    (!ldt_info.read_exec_only << 1);
    448 		sd.sd_dpl = SEL_UPL;
    449 		sd.sd_p = !ldt_info.seg_not_present;
    450 		sd.sd_def32 = ldt_info.seg_32bit;
    451 		sd.sd_gran = ldt_info.limit_in_pages;
    452 		if (!oldmode)
    453 			sd.sd_xx = ldt_info.useable;
    454 	}
    455 	sg = stackgap_init(p, 0);
    456 	sl.start = ldt_info.entry_number;
    457 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    458 	sl.num = 1;
    459 
    460 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    461 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    462 
    463 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    464 
    465 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    466 		return (error);
    467 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    468 		return (error);
    469 
    470 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    471 		return (error);
    472 
    473 	*retval = 0;
    474 	return (0);
    475 }
    476 
    477 #endif /* USER_LDT */
    478 
    479 int
    480 linux_sys_modify_ldt(l, v, retval)
    481 	struct lwp *l;
    482 	void *v;
    483 	register_t *retval;
    484 {
    485 	struct linux_sys_modify_ldt_args /* {
    486 		syscallarg(int) func;
    487 		syscallarg(void *) ptr;
    488 		syscallarg(size_t) bytecount;
    489 	} */ *uap = v;
    490 
    491 	switch (SCARG(uap, func)) {
    492 #ifdef USER_LDT
    493 	case 0:
    494 		return linux_read_ldt(l, uap, retval);
    495 	case 1:
    496 		retval[0] = 1;
    497 		return linux_write_ldt(l, uap, retval);
    498 	case 2:
    499 #ifdef notyet
    500 		return (linux_read_default_ldt(l, uap, retval);
    501 #else
    502 		return (ENOSYS);
    503 #endif
    504 	case 0x11:
    505 		retval[0] = 0;
    506 		return linux_write_ldt(l, uap, retval);
    507 #endif /* USER_LDT */
    508 
    509 	default:
    510 		return (ENOSYS);
    511 	}
    512 }
    513 
    514 /*
    515  * XXX Pathetic hack to make svgalib work. This will fake the major
    516  * device number of an opened VT so that svgalib likes it. grmbl.
    517  * Should probably do it 'wrong the right way' and use a mapping
    518  * array for all major device numbers, and map linux_mknod too.
    519  */
    520 dev_t
    521 linux_fakedev(dev, raw)
    522 	dev_t dev;
    523 	int raw;
    524 {
    525 	if (raw) {
    526 #if (NWSDISPLAY > 0)
    527 		if (major(dev) == NETBSD_WSCONS_MAJOR)
    528 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    529 #endif
    530 		return 0;
    531 	} else {
    532 		return dev;
    533 	}
    534 }
    535 
    536 #if (NWSDISPLAY > 0)
    537 /*
    538  * That's not complete, but enough to get an X server running.
    539  */
    540 #define NR_KEYS 128
    541 static const u_short plain_map[NR_KEYS] = {
    542 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    543 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    544 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    545 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    546 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    547 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    548 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    549 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    550 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    551 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    552 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    553 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    554 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    555 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    556 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    557 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    558 }, shift_map[NR_KEYS] = {
    559 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    560 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    561 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    562 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    563 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    564 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    565 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    566 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    567 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    568 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    569 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    570 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    571 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    572 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    573 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    574 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    575 }, altgr_map[NR_KEYS] = {
    576 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    577 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    578 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    579 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    580 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    581 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    582 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    583 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    584 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    585 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    586 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    587 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    588 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    589 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    590 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    591 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    592 }, ctrl_map[NR_KEYS] = {
    593 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    594 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    595 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    596 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    597 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    598 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    599 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    600 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    601 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    602 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    603 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    604 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    605 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    606 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    607 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    608 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    609 };
    610 
    611 const u_short * const linux_keytabs[] = {
    612 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    613 };
    614 #endif
    615 
    616 static struct biosdisk_info *
    617 fd2biosinfo(p, fp)
    618 	struct proc *p;
    619 	struct file *fp;
    620 {
    621 	struct vnode *vp;
    622 	const char *blkname;
    623 	char diskname[16];
    624 	int i;
    625 	struct nativedisk_info *nip;
    626 	struct disklist *dl = i386_alldisks;
    627 
    628 	if (fp->f_type != DTYPE_VNODE)
    629 		return NULL;
    630 	vp = (struct vnode *)fp->f_data;
    631 
    632 	if (vp->v_type != VBLK)
    633 		return NULL;
    634 
    635 	blkname = findblkname(major(vp->v_rdev));
    636 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    637 	    DISKUNIT(vp->v_rdev));
    638 
    639 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    640 		nip = &dl->dl_nativedisks[i];
    641 		if (strcmp(diskname, nip->ni_devname))
    642 			continue;
    643 		if (nip->ni_nmatches != 0)
    644 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    645 	}
    646 
    647 	return NULL;
    648 }
    649 
    650 
    651 /*
    652  * We come here in a last attempt to satisfy a Linux ioctl() call
    653  */
    654 int
    655 linux_machdepioctl(p, v, retval)
    656 	struct proc *p;
    657 	void *v;
    658 	register_t *retval;
    659 {
    660 	struct linux_sys_ioctl_args /* {
    661 		syscallarg(int) fd;
    662 		syscallarg(u_long) com;
    663 		syscallarg(caddr_t) data;
    664 	} */ *uap = v;
    665 	struct sys_ioctl_args bia;
    666 	u_long com;
    667 	int error, error1;
    668 #if (NWSDISPLAY > 0)
    669 	struct vt_mode lvt;
    670 	caddr_t bvtp, sg;
    671 	struct kbentry kbe;
    672 #endif
    673 	struct linux_hd_geometry hdg;
    674 	struct linux_hd_big_geometry hdg_big;
    675 	struct biosdisk_info *bip;
    676 	struct filedesc *fdp;
    677 	struct file *fp;
    678 	int fd;
    679 	struct disklabel label, *labp;
    680 	struct partinfo partp;
    681 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    682 	u_long start, biostotal, realtotal;
    683 	u_char heads, sectors;
    684 	u_int cylinders;
    685 	struct ioctl_pt pt;
    686 
    687 	fd = SCARG(uap, fd);
    688 	SCARG(&bia, fd) = fd;
    689 	SCARG(&bia, data) = SCARG(uap, data);
    690 	com = SCARG(uap, com);
    691 
    692 	fdp = p->p_fd;
    693 
    694 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    695 		return (EBADF);
    696 
    697 	switch (com) {
    698 #if (NWSDISPLAY > 0)
    699 	case LINUX_KDGKBMODE:
    700 		com = KDGKBMODE;
    701 		break;
    702 	case LINUX_KDSKBMODE:
    703 		com = KDSKBMODE;
    704 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    705 			SCARG(&bia, data) = (caddr_t)K_RAW;
    706 		break;
    707 	case LINUX_KIOCSOUND:
    708 		SCARG(&bia, data) =
    709 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    710 		/* fall through */
    711 	case LINUX_KDMKTONE:
    712 		com = KDMKTONE;
    713 		break;
    714 	case LINUX_KDSETMODE:
    715 		com = KDSETMODE;
    716 		break;
    717 	case LINUX_KDGETMODE:
    718 		/* KD_* values are equal to the wscons numbers */
    719 		com = WSDISPLAYIO_GMODE;
    720 		break;
    721 	case LINUX_KDENABIO:
    722 		com = KDENABIO;
    723 		break;
    724 	case LINUX_KDDISABIO:
    725 		com = KDDISABIO;
    726 		break;
    727 	case LINUX_KDGETLED:
    728 		com = KDGETLED;
    729 		break;
    730 	case LINUX_KDSETLED:
    731 		com = KDSETLED;
    732 		break;
    733 	case LINUX_VT_OPENQRY:
    734 		com = VT_OPENQRY;
    735 		break;
    736 	case LINUX_VT_GETMODE:
    737 		SCARG(&bia, com) = VT_GETMODE;
    738 		/* XXX NJWLWP */
    739 		if ((error = sys_ioctl(curproc, &bia, retval)))
    740 			return error;
    741 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    742 		    sizeof (struct vt_mode))))
    743 			return error;
    744 		lvt.relsig = native_to_linux_sig[lvt.relsig];
    745 		lvt.acqsig = native_to_linux_sig[lvt.acqsig];
    746 		lvt.frsig = native_to_linux_sig[lvt.frsig];
    747 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    748 		    sizeof (struct vt_mode));
    749 	case LINUX_VT_SETMODE:
    750 		com = VT_SETMODE;
    751 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    752 		    sizeof (struct vt_mode))))
    753 			return error;
    754 		lvt.relsig = linux_to_native_sig[lvt.relsig];
    755 		lvt.acqsig = linux_to_native_sig[lvt.acqsig];
    756 		lvt.frsig = linux_to_native_sig[lvt.frsig];
    757 		sg = stackgap_init(p, 0);
    758 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    759 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    760 			return error;
    761 		SCARG(&bia, data) = bvtp;
    762 		break;
    763 	case LINUX_VT_DISALLOCATE:
    764 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    765 		return 0;
    766 	case LINUX_VT_RELDISP:
    767 		com = VT_RELDISP;
    768 		break;
    769 	case LINUX_VT_ACTIVATE:
    770 		com = VT_ACTIVATE;
    771 		break;
    772 	case LINUX_VT_WAITACTIVE:
    773 		com = VT_WAITACTIVE;
    774 		break;
    775 	case LINUX_VT_GETSTATE:
    776 		com = VT_GETSTATE;
    777 		break;
    778 	case LINUX_KDGKBTYPE:
    779 		/* This is what Linux does. */
    780 		return (subyte(SCARG(uap, data), KB_101));
    781 	case LINUX_KDGKBENT:
    782 		/*
    783 		 * The Linux KDGKBENT ioctl is different from the
    784 		 * SYSV original. So we handle it in machdep code.
    785 		 * XXX We should use keyboard mapping information
    786 		 * from wsdisplay, but this would be expensive.
    787 		 */
    788 		if ((error = copyin(SCARG(uap, data), &kbe,
    789 				    sizeof(struct kbentry))))
    790 			return (error);
    791 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    792 		    || kbe.kb_index >= NR_KEYS)
    793 			return (EINVAL);
    794 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    795 		return (copyout(&kbe, SCARG(uap, data),
    796 				sizeof(struct kbentry)));
    797 #endif
    798 	case LINUX_HDIO_GETGEO:
    799 	case LINUX_HDIO_GETGEO_BIG:
    800 		/*
    801 		 * Try to mimic Linux behaviour: return the BIOS geometry
    802 		 * if possible (extending its # of cylinders if it's beyond
    803 		 * the 1023 limit), fall back to the MI geometry (i.e.
    804 		 * the real geometry) if not found, by returning an
    805 		 * error. See common/linux_hdio.c
    806 		 */
    807 		FILE_USE(fp);
    808 		bip = fd2biosinfo(p, fp);
    809 		ioctlf = fp->f_ops->fo_ioctl;
    810 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    811 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    812 		FILE_UNUSE(fp, p);
    813 		if (error != 0 && error1 != 0)
    814 			return error1;
    815 		labp = error != 0 ? &label : partp.disklab;
    816 		start = error1 != 0 ? partp.part->p_offset : 0;
    817 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    818 		    && bip->bi_cyl != 0) {
    819 			heads = bip->bi_head;
    820 			sectors = bip->bi_sec;
    821 			cylinders = bip->bi_cyl;
    822 			biostotal = heads * sectors * cylinders;
    823 			realtotal = labp->d_ntracks * labp->d_nsectors *
    824 			    labp->d_ncylinders;
    825 			if (realtotal > biostotal)
    826 				cylinders = realtotal / (heads * sectors);
    827 		} else {
    828 			heads = labp->d_ntracks;
    829 			cylinders = labp->d_ncylinders;
    830 			sectors = labp->d_nsectors;
    831 		}
    832 		if (com == LINUX_HDIO_GETGEO) {
    833 			hdg.start = start;
    834 			hdg.heads = heads;
    835 			hdg.cylinders = cylinders;
    836 			hdg.sectors = sectors;
    837 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    838 		} else {
    839 			hdg_big.start = start;
    840 			hdg_big.heads = heads;
    841 			hdg_big.cylinders = cylinders;
    842 			hdg_big.sectors = sectors;
    843 			return copyout(&hdg_big, SCARG(uap, data),
    844 			    sizeof hdg_big);
    845 		}
    846 		return 0;
    847 
    848 	default:
    849 		/*
    850 		 * Unknown to us. If it's on a device, just pass it through
    851 		 * using PTIOCLINUX, the device itself might be able to
    852 		 * make some sense of it.
    853 		 * XXX hack: if the function returns EJUSTRETURN,
    854 		 * it has stuffed a sysctl return value in pt.data.
    855 		 */
    856 		FILE_USE(fp);
    857 		ioctlf = fp->f_ops->fo_ioctl;
    858 		pt.com = SCARG(uap, com);
    859 		pt.data = SCARG(uap, data);
    860 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    861 		FILE_UNUSE(fp, p);
    862 		if (error == EJUSTRETURN) {
    863 			retval[0] = (register_t)pt.data;
    864 			error = 0;
    865 		}
    866 
    867 		if (error == ENOTTY)
    868 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
    869 			    com));
    870 		return error;
    871 	}
    872 	SCARG(&bia, com) = com;
    873 	/* XXX NJWLWP */
    874 	return sys_ioctl(curproc, &bia, retval);
    875 }
    876 
    877 /*
    878  * Set I/O permissions for a process. Just set the maximum level
    879  * right away (ignoring the argument), otherwise we would have
    880  * to rely on I/O permission maps, which are not implemented.
    881  */
    882 int
    883 linux_sys_iopl(l, v, retval)
    884 	struct lwp *l;
    885 	void *v;
    886 	register_t *retval;
    887 {
    888 #if 0
    889 	struct linux_sys_iopl_args /* {
    890 		syscallarg(int) level;
    891 	} */ *uap = v;
    892 #endif
    893 	struct proc *p = l->l_proc;
    894 	struct trapframe *fp = l->l_md.md_regs;
    895 
    896 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    897 		return EPERM;
    898 	fp->tf_eflags |= PSL_IOPL;
    899 	*retval = 0;
    900 	return 0;
    901 }
    902 
    903 /*
    904  * See above. If a root process tries to set access to an I/O port,
    905  * just let it have the whole range.
    906  */
    907 int
    908 linux_sys_ioperm(l, v, retval)
    909 	struct lwp *l;
    910 	void *v;
    911 	register_t *retval;
    912 {
    913 	struct linux_sys_ioperm_args /* {
    914 		syscallarg(unsigned int) lo;
    915 		syscallarg(unsigned int) hi;
    916 		syscallarg(int) val;
    917 	} */ *uap = v;
    918 	struct proc *p = l->l_proc;
    919 	struct trapframe *fp = l->l_md.md_regs;
    920 
    921 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    922 		return EPERM;
    923 	if (SCARG(uap, val))
    924 		fp->tf_eflags |= PSL_IOPL;
    925 	*retval = 0;
    926 	return 0;
    927 }
    928