Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.72
      1 /*	$NetBSD: linux_machdep.c,v 1.72 2002/03/22 18:39:23 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.72 2002/03/22 18:39:23 christos Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/map.h>
     52 #include <sys/proc.h>
     53 #include <sys/user.h>
     54 #include <sys/buf.h>
     55 #include <sys/reboot.h>
     56 #include <sys/conf.h>
     57 #include <sys/exec.h>
     58 #include <sys/file.h>
     59 #include <sys/callout.h>
     60 #include <sys/malloc.h>
     61 #include <sys/mbuf.h>
     62 #include <sys/msgbuf.h>
     63 #include <sys/mount.h>
     64 #include <sys/vnode.h>
     65 #include <sys/device.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <miscfs/specfs/specdev.h>
     72 
     73 #include <compat/linux/common/linux_types.h>
     74 #include <compat/linux/common/linux_signal.h>
     75 #include <compat/linux/common/linux_util.h>
     76 #include <compat/linux/common/linux_ioctl.h>
     77 #include <compat/linux/common/linux_hdio.h>
     78 #include <compat/linux/common/linux_exec.h>
     79 #include <compat/linux/common/linux_machdep.h>
     80 
     81 #include <compat/linux/linux_syscallargs.h>
     82 
     83 #include <machine/cpu.h>
     84 #include <machine/cpufunc.h>
     85 #include <machine/psl.h>
     86 #include <machine/reg.h>
     87 #include <machine/segments.h>
     88 #include <machine/specialreg.h>
     89 #include <machine/sysarch.h>
     90 #include <machine/vm86.h>
     91 #include <machine/vmparam.h>
     92 
     93 /*
     94  * To see whether wscons is configured (for virtual console ioctl calls).
     95  */
     96 #if defined(_KERNEL_OPT)
     97 #include "wsdisplay.h"
     98 #endif
     99 #if (NWSDISPLAY > 0)
    100 #include <dev/wscons/wsconsio.h>
    101 #include <dev/wscons/wsdisplay_usl_io.h>
    102 #if defined(_KERNEL_OPT)
    103 #include "opt_xserver.h"
    104 #endif
    105 #endif
    106 
    107 #ifdef USER_LDT
    108 #include <machine/cpu.h>
    109 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    110     register_t *));
    111 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    112     register_t *));
    113 #endif
    114 
    115 #ifdef DEBUG_LINUX
    116 #define DPRINTF(a) uprintf a
    117 #else
    118 #define DPRINTF(a)
    119 #endif
    120 
    121 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    122 extern struct disklist *i386_alldisks;
    123 extern const char *findblkname __P((int));
    124 
    125 /*
    126  * Deal with some i386-specific things in the Linux emulation code.
    127  */
    128 
    129 void
    130 linux_setregs(p, epp, stack)
    131 	struct proc *p;
    132 	struct exec_package *epp;
    133 	u_long stack;
    134 {
    135 	struct pcb *pcb = &p->p_addr->u_pcb;
    136 
    137 	setregs(p, epp, stack);
    138 	if (i386_use_fxsave)
    139 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    140 	else
    141 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    142 }
    143 
    144 /*
    145  * Send an interrupt to process.
    146  *
    147  * Stack is set up to allow sigcode stored
    148  * in u. to call routine, followed by kcall
    149  * to sigreturn routine below.  After sigreturn
    150  * resets the signal mask, the stack, and the
    151  * frame pointer, it returns to the user
    152  * specified pc, psl.
    153  */
    154 
    155 void
    156 linux_sendsig(catcher, sig, mask, code)
    157 	sig_t catcher;
    158 	int sig;
    159 	sigset_t *mask;
    160 	u_long code;
    161 {
    162 	struct proc *p = curproc;
    163 	struct trapframe *tf;
    164 	struct linux_sigframe *fp, frame;
    165 	int onstack;
    166 
    167 	tf = p->p_md.md_regs;
    168 
    169 	/* Do we need to jump onto the signal stack? */
    170 	onstack =
    171 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    172 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    173 
    174 	/* Allocate space for the signal handler context. */
    175 	if (onstack)
    176 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    177 					  p->p_sigctx.ps_sigstk.ss_size);
    178 	else
    179 		fp = (struct linux_sigframe *)tf->tf_esp;
    180 	fp--;
    181 
    182 	/* Build stack frame for signal trampoline. */
    183 	frame.sf_handler = catcher;
    184 	frame.sf_sig = native_to_linux_sig[sig];
    185 
    186 	/* Save register context. */
    187 #ifdef VM86
    188 	if (tf->tf_eflags & PSL_VM) {
    189 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    190 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    191 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    192 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    193 		frame.sf_sc.sc_eflags = get_vflags(p);
    194 	} else
    195 #endif
    196 	{
    197 		frame.sf_sc.sc_gs = tf->tf_gs;
    198 		frame.sf_sc.sc_fs = tf->tf_fs;
    199 		frame.sf_sc.sc_es = tf->tf_es;
    200 		frame.sf_sc.sc_ds = tf->tf_ds;
    201 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    202 	}
    203 	frame.sf_sc.sc_edi = tf->tf_edi;
    204 	frame.sf_sc.sc_esi = tf->tf_esi;
    205 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    206 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    207 	frame.sf_sc.sc_edx = tf->tf_edx;
    208 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    209 	frame.sf_sc.sc_eax = tf->tf_eax;
    210 	frame.sf_sc.sc_eip = tf->tf_eip;
    211 	frame.sf_sc.sc_cs = tf->tf_cs;
    212 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    213 	frame.sf_sc.sc_ss = tf->tf_ss;
    214 	frame.sf_sc.sc_err = tf->tf_err;
    215 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    216 	frame.sf_sc.sc_cr2 = p->p_addr->u_pcb.pcb_cr2;
    217 
    218 	/* Save signal stack. */
    219 	/* Linux doesn't save the onstack flag in sigframe */
    220 
    221 	/* Save signal mask. */
    222 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
    223 
    224 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    225 		/*
    226 		 * Process has trashed its stack; give it an illegal
    227 		 * instruction to halt it in its tracks.
    228 		 */
    229 		sigexit(p, SIGILL);
    230 		/* NOTREACHED */
    231 	}
    232 
    233 	/*
    234 	 * Build context to run handler in.
    235 	 */
    236 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    237 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    238 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    239 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    240 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    241 	tf->tf_esp = (int)fp;
    242 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    243 
    244 	/* Remember that we're now on the signal stack. */
    245 	if (onstack)
    246 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    247 }
    248 
    249 /*
    250  * System call to cleanup state after a signal
    251  * has been taken.  Reset signal mask and
    252  * stack state from context left by sendsig (above).
    253  * Return to previous pc and psl as specified by
    254  * context left by sendsig. Check carefully to
    255  * make sure that the user has not modified the
    256  * psl to gain improper privileges or to cause
    257  * a machine fault.
    258  */
    259 int
    260 linux_sys_rt_sigreturn(p, v, retval)
    261 	struct proc *p;
    262 	void *v;
    263 	register_t *retval;
    264 {
    265 	/* XXX XAX write me */
    266 	return(ENOSYS);
    267 }
    268 
    269 int
    270 linux_sys_sigreturn(p, v, retval)
    271 	struct proc *p;
    272 	void *v;
    273 	register_t *retval;
    274 {
    275 	struct linux_sys_sigreturn_args /* {
    276 		syscallarg(struct linux_sigcontext *) scp;
    277 	} */ *uap = v;
    278 	struct linux_sigcontext *scp, context;
    279 	struct trapframe *tf;
    280 	sigset_t mask;
    281 	ssize_t ss_gap;
    282 
    283 	/*
    284 	 * The trampoline code hands us the context.
    285 	 * It is unsafe to keep track of it ourselves, in the event that a
    286 	 * program jumps out of a signal handler.
    287 	 */
    288 	scp = SCARG(uap, scp);
    289 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    290 		return (EFAULT);
    291 
    292 	/* Restore register context. */
    293 	tf = p->p_md.md_regs;
    294 #ifdef VM86
    295 	if (context.sc_eflags & PSL_VM) {
    296 		tf->tf_vm86_gs = context.sc_gs;
    297 		tf->tf_vm86_fs = context.sc_fs;
    298 		tf->tf_vm86_es = context.sc_es;
    299 		tf->tf_vm86_ds = context.sc_ds;
    300 		set_vflags(p, context.sc_eflags);
    301 	} else
    302 #endif
    303 	{
    304 		/*
    305 		 * Check for security violations.  If we're returning to
    306 		 * protected mode, the CPU will validate the segment registers
    307 		 * automatically and generate a trap on violations.  We handle
    308 		 * the trap, rather than doing all of the checking here.
    309 		 */
    310 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    311 		    !USERMODE(context.sc_cs, context.sc_eflags))
    312 			return (EINVAL);
    313 
    314 		/* %fs and %gs were restored by the trampoline. */
    315 		tf->tf_es = context.sc_es;
    316 		tf->tf_ds = context.sc_ds;
    317 		tf->tf_eflags = context.sc_eflags;
    318 	}
    319 	tf->tf_edi = context.sc_edi;
    320 	tf->tf_esi = context.sc_esi;
    321 	tf->tf_ebp = context.sc_ebp;
    322 	tf->tf_ebx = context.sc_ebx;
    323 	tf->tf_edx = context.sc_edx;
    324 	tf->tf_ecx = context.sc_ecx;
    325 	tf->tf_eax = context.sc_eax;
    326 	tf->tf_eip = context.sc_eip;
    327 	tf->tf_cs = context.sc_cs;
    328 	tf->tf_esp = context.sc_esp_at_signal;
    329 	tf->tf_ss = context.sc_ss;
    330 
    331 	/* Restore signal stack. */
    332 	/*
    333 	 * Linux really does it this way; it doesn't have space in sigframe
    334 	 * to save the onstack flag.
    335 	 */
    336 	ss_gap = (ssize_t)
    337 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    338 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    339 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    340 	else
    341 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    342 
    343 	/* Restore signal mask. */
    344 	linux_old_to_native_sigset(&mask, &context.sc_mask);
    345 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    346 
    347 	return (EJUSTRETURN);
    348 }
    349 
    350 #ifdef USER_LDT
    351 
    352 int
    353 linux_read_ldt(p, uap, retval)
    354 	struct proc *p;
    355 	struct linux_sys_modify_ldt_args /* {
    356 		syscallarg(int) func;
    357 		syscallarg(void *) ptr;
    358 		syscallarg(size_t) bytecount;
    359 	} */ *uap;
    360 	register_t *retval;
    361 {
    362 	struct i386_get_ldt_args gl;
    363 	int error;
    364 	caddr_t sg;
    365 	char *parms;
    366 
    367 	DPRINTF(("linux_read_ldt!"));
    368 	sg = stackgap_init(p, 0);
    369 
    370 	gl.start = 0;
    371 	gl.desc = SCARG(uap, ptr);
    372 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    373 
    374 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    375 
    376 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    377 		return (error);
    378 
    379 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
    380 		return (error);
    381 
    382 	*retval *= sizeof(union descriptor);
    383 	return (0);
    384 }
    385 
    386 struct linux_ldt_info {
    387 	u_int entry_number;
    388 	u_long base_addr;
    389 	u_int limit;
    390 	u_int seg_32bit:1;
    391 	u_int contents:2;
    392 	u_int read_exec_only:1;
    393 	u_int limit_in_pages:1;
    394 	u_int seg_not_present:1;
    395 	u_int useable:1;
    396 };
    397 
    398 int
    399 linux_write_ldt(p, uap, retval)
    400 	struct proc *p;
    401 	struct linux_sys_modify_ldt_args /* {
    402 		syscallarg(int) func;
    403 		syscallarg(void *) ptr;
    404 		syscallarg(size_t) bytecount;
    405 	} */ *uap;
    406 	register_t *retval;
    407 {
    408 	struct linux_ldt_info ldt_info;
    409 	struct segment_descriptor sd;
    410 	struct i386_set_ldt_args sl;
    411 	int error;
    412 	caddr_t sg;
    413 	char *parms;
    414 	int oldmode = (int)retval[0];
    415 
    416 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    417 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    418 		return (EINVAL);
    419 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    420 		return error;
    421 	if (ldt_info.entry_number >= 8192)
    422 		return (EINVAL);
    423 	if (ldt_info.contents == 3) {
    424 		if (oldmode)
    425 			return (EINVAL);
    426 		if (ldt_info.seg_not_present)
    427 			return (EINVAL);
    428 	}
    429 
    430 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    431 	    (oldmode || (ldt_info.contents == 0 &&
    432 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    433 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    434 	    ldt_info.useable == 0))) {
    435 		/* this means you should zero the ldt */
    436 		(void)memset(&sd, 0, sizeof(sd));
    437 	} else {
    438 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    439 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    440 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    441 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    442 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    443 		    (!ldt_info.read_exec_only << 1);
    444 		sd.sd_dpl = SEL_UPL;
    445 		sd.sd_p = !ldt_info.seg_not_present;
    446 		sd.sd_def32 = ldt_info.seg_32bit;
    447 		sd.sd_gran = ldt_info.limit_in_pages;
    448 		if (!oldmode)
    449 			sd.sd_xx = ldt_info.useable;
    450 	}
    451 	sg = stackgap_init(p, 0);
    452 	sl.start = ldt_info.entry_number;
    453 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    454 	sl.num = 1;
    455 
    456 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    457 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    458 
    459 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    460 
    461 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    462 		return (error);
    463 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    464 		return (error);
    465 
    466 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
    467 		return (error);
    468 
    469 	*retval = 0;
    470 	return (0);
    471 }
    472 
    473 #endif /* USER_LDT */
    474 
    475 int
    476 linux_sys_modify_ldt(p, v, retval)
    477 	struct proc *p;
    478 	void *v;
    479 	register_t *retval;
    480 {
    481 	struct linux_sys_modify_ldt_args /* {
    482 		syscallarg(int) func;
    483 		syscallarg(void *) ptr;
    484 		syscallarg(size_t) bytecount;
    485 	} */ *uap = v;
    486 
    487 	switch (SCARG(uap, func)) {
    488 #ifdef USER_LDT
    489 	case 0:
    490 		return linux_read_ldt(p, uap, retval);
    491 	case 1:
    492 		retval[0] = 1;
    493 		return linux_write_ldt(p, uap, retval);
    494 	case 2:
    495 #ifdef notyet
    496 		return (linux_read_default_ldt(p, uap, retval);
    497 #else
    498 		return (ENOSYS);
    499 #endif
    500 	case 0x11:
    501 		retval[0] = 0;
    502 		return linux_write_ldt(p, uap, retval);
    503 #endif /* USER_LDT */
    504 
    505 	default:
    506 		return (ENOSYS);
    507 	}
    508 }
    509 
    510 /*
    511  * XXX Pathetic hack to make svgalib work. This will fake the major
    512  * device number of an opened VT so that svgalib likes it. grmbl.
    513  * Should probably do it 'wrong the right way' and use a mapping
    514  * array for all major device numbers, and map linux_mknod too.
    515  */
    516 dev_t
    517 linux_fakedev(dev, raw)
    518 	dev_t dev;
    519 	int raw;
    520 {
    521 	if (raw) {
    522 #if (NWSDISPLAY > 0)
    523 		if (major(dev) == NETBSD_WSCONS_MAJOR)
    524 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    525 #endif
    526 		return 0;
    527 	} else {
    528 		return dev;
    529 	}
    530 }
    531 
    532 #if (NWSDISPLAY > 0)
    533 /*
    534  * That's not complete, but enough to get an X server running.
    535  */
    536 #define NR_KEYS 128
    537 static const u_short plain_map[NR_KEYS] = {
    538 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    539 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    540 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    541 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    542 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    543 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    544 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    545 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    546 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    547 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    548 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    549 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    550 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    551 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    552 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    553 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    554 }, shift_map[NR_KEYS] = {
    555 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    556 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    557 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    558 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    559 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    560 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    561 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    562 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    563 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    564 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    565 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    566 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    567 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    568 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    569 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    570 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    571 }, altgr_map[NR_KEYS] = {
    572 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    573 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    574 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    575 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    576 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    577 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    578 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    579 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    580 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    581 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    582 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    583 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    584 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    585 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    586 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    587 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    588 }, ctrl_map[NR_KEYS] = {
    589 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    590 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    591 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    592 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    593 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    594 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    595 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    596 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    597 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    598 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    599 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    600 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    601 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    602 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    603 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    604 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    605 };
    606 
    607 const u_short * const linux_keytabs[] = {
    608 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    609 };
    610 #endif
    611 
    612 static struct biosdisk_info *
    613 fd2biosinfo(p, fp)
    614 	struct proc *p;
    615 	struct file *fp;
    616 {
    617 	struct vnode *vp;
    618 	const char *blkname;
    619 	char diskname[16];
    620 	int i;
    621 	struct nativedisk_info *nip;
    622 	struct disklist *dl = i386_alldisks;
    623 
    624 	if (fp->f_type != DTYPE_VNODE)
    625 		return NULL;
    626 	vp = (struct vnode *)fp->f_data;
    627 
    628 	if (vp->v_type != VBLK)
    629 		return NULL;
    630 
    631 	blkname = findblkname(major(vp->v_rdev));
    632 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    633 	    DISKUNIT(vp->v_rdev));
    634 
    635 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    636 		nip = &dl->dl_nativedisks[i];
    637 		if (strcmp(diskname, nip->ni_devname))
    638 			continue;
    639 		if (nip->ni_nmatches != 0)
    640 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    641 	}
    642 
    643 	return NULL;
    644 }
    645 
    646 
    647 /*
    648  * We come here in a last attempt to satisfy a Linux ioctl() call
    649  */
    650 int
    651 linux_machdepioctl(p, v, retval)
    652 	struct proc *p;
    653 	void *v;
    654 	register_t *retval;
    655 {
    656 	struct linux_sys_ioctl_args /* {
    657 		syscallarg(int) fd;
    658 		syscallarg(u_long) com;
    659 		syscallarg(caddr_t) data;
    660 	} */ *uap = v;
    661 	struct sys_ioctl_args bia;
    662 	u_long com;
    663 	int error, error1;
    664 #if (NWSDISPLAY > 0)
    665 	struct vt_mode lvt;
    666 	caddr_t bvtp, sg;
    667 	struct kbentry kbe;
    668 #endif
    669 	struct linux_hd_geometry hdg;
    670 	struct linux_hd_big_geometry hdg_big;
    671 	struct biosdisk_info *bip;
    672 	struct filedesc *fdp;
    673 	struct file *fp;
    674 	int fd;
    675 	struct disklabel label, *labp;
    676 	struct partinfo partp;
    677 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    678 	u_long start, biostotal, realtotal;
    679 	u_char heads, sectors;
    680 	u_int cylinders;
    681 	struct ioctl_pt pt;
    682 
    683 	fd = SCARG(uap, fd);
    684 	SCARG(&bia, fd) = fd;
    685 	SCARG(&bia, data) = SCARG(uap, data);
    686 	com = SCARG(uap, com);
    687 
    688 	fdp = p->p_fd;
    689 
    690 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    691 		return (EBADF);
    692 
    693 	switch (com) {
    694 #if (NWSDISPLAY > 0)
    695 	case LINUX_KDGKBMODE:
    696 		com = KDGKBMODE;
    697 		break;
    698 	case LINUX_KDSKBMODE:
    699 		com = KDSKBMODE;
    700 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    701 			SCARG(&bia, data) = (caddr_t)K_RAW;
    702 		break;
    703 	case LINUX_KIOCSOUND:
    704 		SCARG(&bia, data) =
    705 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    706 		/* fall through */
    707 	case LINUX_KDMKTONE:
    708 		com = KDMKTONE;
    709 		break;
    710 	case LINUX_KDSETMODE:
    711 		com = KDSETMODE;
    712 		break;
    713 	case LINUX_KDGETMODE:
    714 		/* KD_* values are equal to the wscons numbers */
    715 		com = WSDISPLAYIO_GMODE;
    716 		break;
    717 	case LINUX_KDENABIO:
    718 		com = KDENABIO;
    719 		break;
    720 	case LINUX_KDDISABIO:
    721 		com = KDDISABIO;
    722 		break;
    723 	case LINUX_KDGETLED:
    724 		com = KDGETLED;
    725 		break;
    726 	case LINUX_KDSETLED:
    727 		com = KDSETLED;
    728 		break;
    729 	case LINUX_VT_OPENQRY:
    730 		com = VT_OPENQRY;
    731 		break;
    732 	case LINUX_VT_GETMODE:
    733 		SCARG(&bia, com) = VT_GETMODE;
    734 		if ((error = sys_ioctl(p, &bia, retval)))
    735 			return error;
    736 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    737 		    sizeof (struct vt_mode))))
    738 			return error;
    739 		lvt.relsig = native_to_linux_sig[lvt.relsig];
    740 		lvt.acqsig = native_to_linux_sig[lvt.acqsig];
    741 		lvt.frsig = native_to_linux_sig[lvt.frsig];
    742 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    743 		    sizeof (struct vt_mode));
    744 	case LINUX_VT_SETMODE:
    745 		com = VT_SETMODE;
    746 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    747 		    sizeof (struct vt_mode))))
    748 			return error;
    749 		lvt.relsig = linux_to_native_sig[lvt.relsig];
    750 		lvt.acqsig = linux_to_native_sig[lvt.acqsig];
    751 		lvt.frsig = linux_to_native_sig[lvt.frsig];
    752 		sg = stackgap_init(p, 0);
    753 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    754 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    755 			return error;
    756 		SCARG(&bia, data) = bvtp;
    757 		break;
    758 	case LINUX_VT_DISALLOCATE:
    759 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    760 		return 0;
    761 	case LINUX_VT_RELDISP:
    762 		com = VT_RELDISP;
    763 		break;
    764 	case LINUX_VT_ACTIVATE:
    765 		com = VT_ACTIVATE;
    766 		break;
    767 	case LINUX_VT_WAITACTIVE:
    768 		com = VT_WAITACTIVE;
    769 		break;
    770 	case LINUX_VT_GETSTATE:
    771 		com = VT_GETSTATE;
    772 		break;
    773 	case LINUX_KDGKBTYPE:
    774 		/* This is what Linux does. */
    775 		return (subyte(SCARG(uap, data), KB_101));
    776 	case LINUX_KDGKBENT:
    777 		/*
    778 		 * The Linux KDGKBENT ioctl is different from the
    779 		 * SYSV original. So we handle it in machdep code.
    780 		 * XXX We should use keyboard mapping information
    781 		 * from wsdisplay, but this would be expensive.
    782 		 */
    783 		if ((error = copyin(SCARG(uap, data), &kbe,
    784 				    sizeof(struct kbentry))))
    785 			return (error);
    786 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    787 		    || kbe.kb_index >= NR_KEYS)
    788 			return (EINVAL);
    789 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    790 		return (copyout(&kbe, SCARG(uap, data),
    791 				sizeof(struct kbentry)));
    792 #endif
    793 	case LINUX_HDIO_GETGEO:
    794 	case LINUX_HDIO_GETGEO_BIG:
    795 		/*
    796 		 * Try to mimic Linux behaviour: return the BIOS geometry
    797 		 * if possible (extending its # of cylinders if it's beyond
    798 		 * the 1023 limit), fall back to the MI geometry (i.e.
    799 		 * the real geometry) if not found, by returning an
    800 		 * error. See common/linux_hdio.c
    801 		 */
    802 		FILE_USE(fp);
    803 		bip = fd2biosinfo(p, fp);
    804 		ioctlf = fp->f_ops->fo_ioctl;
    805 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    806 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    807 		FILE_UNUSE(fp, p);
    808 		if (error != 0 && error1 != 0)
    809 			return error1;
    810 		labp = error != 0 ? &label : partp.disklab;
    811 		start = error1 != 0 ? partp.part->p_offset : 0;
    812 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    813 		    && bip->bi_cyl != 0) {
    814 			heads = bip->bi_head;
    815 			sectors = bip->bi_sec;
    816 			cylinders = bip->bi_cyl;
    817 			biostotal = heads * sectors * cylinders;
    818 			realtotal = labp->d_ntracks * labp->d_nsectors *
    819 			    labp->d_ncylinders;
    820 			if (realtotal > biostotal)
    821 				cylinders = realtotal / (heads * sectors);
    822 		} else {
    823 			heads = labp->d_ntracks;
    824 			cylinders = labp->d_ncylinders;
    825 			sectors = labp->d_nsectors;
    826 		}
    827 		if (com == LINUX_HDIO_GETGEO) {
    828 			hdg.start = start;
    829 			hdg.heads = heads;
    830 			hdg.cylinders = cylinders;
    831 			hdg.sectors = sectors;
    832 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    833 		} else {
    834 			hdg_big.start = start;
    835 			hdg_big.heads = heads;
    836 			hdg_big.cylinders = cylinders;
    837 			hdg_big.sectors = sectors;
    838 			return copyout(&hdg_big, SCARG(uap, data),
    839 			    sizeof hdg_big);
    840 		}
    841 		return 0;
    842 
    843 	default:
    844 		/*
    845 		 * Unknown to us. If it's on a device, just pass it through
    846 		 * using PTIOCLINUX, the device itself might be able to
    847 		 * make some sense of it.
    848 		 * XXX hack: if the function returns EJUSTRETURN,
    849 		 * it has stuffed a sysctl return value in pt.data.
    850 		 */
    851 		FILE_USE(fp);
    852 		ioctlf = fp->f_ops->fo_ioctl;
    853 		pt.com = SCARG(uap, com);
    854 		pt.data = SCARG(uap, data);
    855 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    856 		FILE_UNUSE(fp, p);
    857 		if (error == EJUSTRETURN) {
    858 			retval[0] = (register_t)pt.data;
    859 			error = 0;
    860 		}
    861 
    862 		if (error == ENOTTY)
    863 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
    864 			    com));
    865 		return error;
    866 	}
    867 	SCARG(&bia, com) = com;
    868 	return sys_ioctl(p, &bia, retval);
    869 }
    870 
    871 /*
    872  * Set I/O permissions for a process. Just set the maximum level
    873  * right away (ignoring the argument), otherwise we would have
    874  * to rely on I/O permission maps, which are not implemented.
    875  */
    876 int
    877 linux_sys_iopl(p, v, retval)
    878 	struct proc *p;
    879 	void *v;
    880 	register_t *retval;
    881 {
    882 #if 0
    883 	struct linux_sys_iopl_args /* {
    884 		syscallarg(int) level;
    885 	} */ *uap = v;
    886 #endif
    887 	struct trapframe *fp = p->p_md.md_regs;
    888 
    889 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    890 		return EPERM;
    891 	fp->tf_eflags |= PSL_IOPL;
    892 	*retval = 0;
    893 	return 0;
    894 }
    895 
    896 /*
    897  * See above. If a root process tries to set access to an I/O port,
    898  * just let it have the whole range.
    899  */
    900 int
    901 linux_sys_ioperm(p, v, retval)
    902 	struct proc *p;
    903 	void *v;
    904 	register_t *retval;
    905 {
    906 	struct linux_sys_ioperm_args /* {
    907 		syscallarg(unsigned int) lo;
    908 		syscallarg(unsigned int) hi;
    909 		syscallarg(int) val;
    910 	} */ *uap = v;
    911 	struct trapframe *fp = p->p_md.md_regs;
    912 
    913 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    914 		return EPERM;
    915 	if (SCARG(uap, val))
    916 		fp->tf_eflags |= PSL_IOPL;
    917 	*retval = 0;
    918 	return 0;
    919 }
    920