Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.66
      1 /*	$NetBSD: linux_machdep.c,v 1.66 2001/07/15 20:02:21 jdolecek Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #if defined(_KERNEL_OPT)
     40 #include "opt_vm86.h"
     41 #include "opt_user_ldt.h"
     42 #endif
     43 
     44 #include <sys/param.h>
     45 #include <sys/systm.h>
     46 #include <sys/signalvar.h>
     47 #include <sys/kernel.h>
     48 #include <sys/map.h>
     49 #include <sys/proc.h>
     50 #include <sys/user.h>
     51 #include <sys/buf.h>
     52 #include <sys/reboot.h>
     53 #include <sys/conf.h>
     54 #include <sys/exec.h>
     55 #include <sys/file.h>
     56 #include <sys/callout.h>
     57 #include <sys/malloc.h>
     58 #include <sys/mbuf.h>
     59 #include <sys/msgbuf.h>
     60 #include <sys/mount.h>
     61 #include <sys/vnode.h>
     62 #include <sys/device.h>
     63 #include <sys/syscallargs.h>
     64 #include <sys/filedesc.h>
     65 #include <sys/exec_elf.h>
     66 #include <sys/disklabel.h>
     67 #include <sys/ioctl.h>
     68 #include <miscfs/specfs/specdev.h>
     69 
     70 #include <compat/linux/common/linux_types.h>
     71 #include <compat/linux/common/linux_signal.h>
     72 #include <compat/linux/common/linux_util.h>
     73 #include <compat/linux/common/linux_ioctl.h>
     74 #include <compat/linux/common/linux_hdio.h>
     75 #include <compat/linux/common/linux_exec.h>
     76 #include <compat/linux/common/linux_machdep.h>
     77 
     78 #include <compat/linux/linux_syscallargs.h>
     79 
     80 #include <machine/cpu.h>
     81 #include <machine/cpufunc.h>
     82 #include <machine/psl.h>
     83 #include <machine/reg.h>
     84 #include <machine/segments.h>
     85 #include <machine/specialreg.h>
     86 #include <machine/sysarch.h>
     87 #include <machine/vm86.h>
     88 #include <machine/vmparam.h>
     89 
     90 /*
     91  * To see whether wscons is configured (for virtual console ioctl calls).
     92  */
     93 #if defined(_KERNEL_OPT)
     94 #include "wsdisplay.h"
     95 #endif
     96 #if (NWSDISPLAY > 0)
     97 #include <dev/wscons/wsconsio.h>
     98 #include <dev/wscons/wsdisplay_usl_io.h>
     99 #if defined(_KERNEL_OPT)
    100 #include "opt_xserver.h"
    101 #endif
    102 #endif
    103 
    104 #ifdef USER_LDT
    105 #include <machine/cpu.h>
    106 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    107     register_t *));
    108 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    109     register_t *));
    110 #endif
    111 
    112 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    113 extern struct disklist *i386_alldisks;
    114 extern const char *findblkname __P((int));
    115 
    116 /*
    117  * Deal with some i386-specific things in the Linux emulation code.
    118  */
    119 
    120 void
    121 linux_setregs(p, epp, stack)
    122 	struct proc *p;
    123 	struct exec_package *epp;
    124 	u_long stack;
    125 {
    126 	struct pcb *pcb = &p->p_addr->u_pcb;
    127 
    128 	setregs(p, epp, stack);
    129 	pcb->pcb_savefpu.sv_env.en_cw = __Linux_NPXCW__;
    130 }
    131 
    132 /*
    133  * Send an interrupt to process.
    134  *
    135  * Stack is set up to allow sigcode stored
    136  * in u. to call routine, followed by kcall
    137  * to sigreturn routine below.  After sigreturn
    138  * resets the signal mask, the stack, and the
    139  * frame pointer, it returns to the user
    140  * specified pc, psl.
    141  */
    142 
    143 void
    144 linux_sendsig(catcher, sig, mask, code)
    145 	sig_t catcher;
    146 	int sig;
    147 	sigset_t *mask;
    148 	u_long code;
    149 {
    150 	struct proc *p = curproc;
    151 	struct trapframe *tf;
    152 	struct linux_sigframe *fp, frame;
    153 	int onstack;
    154 
    155 	tf = p->p_md.md_regs;
    156 
    157 	/* Do we need to jump onto the signal stack? */
    158 	onstack =
    159 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    160 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    161 
    162 	/* Allocate space for the signal handler context. */
    163 	if (onstack)
    164 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    165 					  p->p_sigctx.ps_sigstk.ss_size);
    166 	else
    167 		fp = (struct linux_sigframe *)tf->tf_esp;
    168 	fp--;
    169 
    170 	/* Build stack frame for signal trampoline. */
    171 	frame.sf_handler = catcher;
    172 	frame.sf_sig = native_to_linux_sig[sig];
    173 
    174 	/* Save register context. */
    175 #ifdef VM86
    176 	if (tf->tf_eflags & PSL_VM) {
    177 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    178 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    179 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    180 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    181 		frame.sf_sc.sc_eflags = get_vflags(p);
    182 	} else
    183 #endif
    184 	{
    185 		frame.sf_sc.sc_gs = tf->tf_gs;
    186 		frame.sf_sc.sc_fs = tf->tf_fs;
    187 		frame.sf_sc.sc_es = tf->tf_es;
    188 		frame.sf_sc.sc_ds = tf->tf_ds;
    189 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    190 	}
    191 	frame.sf_sc.sc_edi = tf->tf_edi;
    192 	frame.sf_sc.sc_esi = tf->tf_esi;
    193 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    194 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    195 	frame.sf_sc.sc_edx = tf->tf_edx;
    196 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    197 	frame.sf_sc.sc_eax = tf->tf_eax;
    198 	frame.sf_sc.sc_eip = tf->tf_eip;
    199 	frame.sf_sc.sc_cs = tf->tf_cs;
    200 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    201 	frame.sf_sc.sc_ss = tf->tf_ss;
    202 	frame.sf_sc.sc_err = tf->tf_err;
    203 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    204 
    205 	/* Save signal stack. */
    206 	/* Linux doesn't save the onstack flag in sigframe */
    207 
    208 	/* Save signal mask. */
    209 	native_to_linux_old_sigset(mask, &frame.sf_sc.sc_mask);
    210 
    211 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    212 		/*
    213 		 * Process has trashed its stack; give it an illegal
    214 		 * instruction to halt it in its tracks.
    215 		 */
    216 		sigexit(p, SIGILL);
    217 		/* NOTREACHED */
    218 	}
    219 
    220 	/*
    221 	 * Build context to run handler in.
    222 	 */
    223 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    224 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    225 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    226 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    227 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    228 	tf->tf_esp = (int)fp;
    229 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    230 
    231 	/* Remember that we're now on the signal stack. */
    232 	if (onstack)
    233 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    234 }
    235 
    236 /*
    237  * System call to cleanup state after a signal
    238  * has been taken.  Reset signal mask and
    239  * stack state from context left by sendsig (above).
    240  * Return to previous pc and psl as specified by
    241  * context left by sendsig. Check carefully to
    242  * make sure that the user has not modified the
    243  * psl to gain improper privileges or to cause
    244  * a machine fault.
    245  */
    246 int
    247 linux_sys_rt_sigreturn(p, v, retval)
    248 	struct proc *p;
    249 	void *v;
    250 	register_t *retval;
    251 {
    252 	/* XXX XAX write me */
    253 	return(ENOSYS);
    254 }
    255 
    256 int
    257 linux_sys_sigreturn(p, v, retval)
    258 	struct proc *p;
    259 	void *v;
    260 	register_t *retval;
    261 {
    262 	struct linux_sys_sigreturn_args /* {
    263 		syscallarg(struct linux_sigcontext *) scp;
    264 	} */ *uap = v;
    265 	struct linux_sigcontext *scp, context;
    266 	struct trapframe *tf;
    267 	sigset_t mask;
    268 	ssize_t ss_gap;
    269 
    270 	/*
    271 	 * The trampoline code hands us the context.
    272 	 * It is unsafe to keep track of it ourselves, in the event that a
    273 	 * program jumps out of a signal handler.
    274 	 */
    275 	scp = SCARG(uap, scp);
    276 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    277 		return (EFAULT);
    278 
    279 	/* Restore register context. */
    280 	tf = p->p_md.md_regs;
    281 #ifdef VM86
    282 	if (context.sc_eflags & PSL_VM) {
    283 		tf->tf_vm86_gs = context.sc_gs;
    284 		tf->tf_vm86_fs = context.sc_fs;
    285 		tf->tf_vm86_es = context.sc_es;
    286 		tf->tf_vm86_ds = context.sc_ds;
    287 		set_vflags(p, context.sc_eflags);
    288 	} else
    289 #endif
    290 	{
    291 		/*
    292 		 * Check for security violations.  If we're returning to
    293 		 * protected mode, the CPU will validate the segment registers
    294 		 * automatically and generate a trap on violations.  We handle
    295 		 * the trap, rather than doing all of the checking here.
    296 		 */
    297 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    298 		    !USERMODE(context.sc_cs, context.sc_eflags))
    299 			return (EINVAL);
    300 
    301 		/* %fs and %gs were restored by the trampoline. */
    302 		tf->tf_es = context.sc_es;
    303 		tf->tf_ds = context.sc_ds;
    304 		tf->tf_eflags = context.sc_eflags;
    305 	}
    306 	tf->tf_edi = context.sc_edi;
    307 	tf->tf_esi = context.sc_esi;
    308 	tf->tf_ebp = context.sc_ebp;
    309 	tf->tf_ebx = context.sc_ebx;
    310 	tf->tf_edx = context.sc_edx;
    311 	tf->tf_ecx = context.sc_ecx;
    312 	tf->tf_eax = context.sc_eax;
    313 	tf->tf_eip = context.sc_eip;
    314 	tf->tf_cs = context.sc_cs;
    315 	tf->tf_esp = context.sc_esp_at_signal;
    316 	tf->tf_ss = context.sc_ss;
    317 
    318 	/* Restore signal stack. */
    319 	/*
    320 	 * Linux really does it this way; it doesn't have space in sigframe
    321 	 * to save the onstack flag.
    322 	 */
    323 	ss_gap = (ssize_t)
    324 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    325 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    326 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    327 	else
    328 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    329 
    330 	/* Restore signal mask. */
    331 	linux_old_to_native_sigset(&context.sc_mask, &mask);
    332 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    333 
    334 	return (EJUSTRETURN);
    335 }
    336 
    337 #ifdef USER_LDT
    338 
    339 int
    340 linux_read_ldt(p, uap, retval)
    341 	struct proc *p;
    342 	struct linux_sys_modify_ldt_args /* {
    343 		syscallarg(int) func;
    344 		syscallarg(void *) ptr;
    345 		syscallarg(size_t) bytecount;
    346 	} */ *uap;
    347 	register_t *retval;
    348 {
    349 	struct i386_get_ldt_args gl;
    350 	int error;
    351 	caddr_t sg;
    352 	char *parms;
    353 
    354 	sg = stackgap_init(p->p_emul);
    355 
    356 	gl.start = 0;
    357 	gl.desc = SCARG(uap, ptr);
    358 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    359 
    360 	parms = stackgap_alloc(&sg, sizeof(gl));
    361 
    362 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    363 		return (error);
    364 
    365 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
    366 		return (error);
    367 
    368 	*retval *= sizeof(union descriptor);
    369 	return (0);
    370 }
    371 
    372 struct linux_ldt_info {
    373 	u_int entry_number;
    374 	u_long base_addr;
    375 	u_int limit;
    376 	u_int seg_32bit:1;
    377 	u_int contents:2;
    378 	u_int read_exec_only:1;
    379 	u_int limit_in_pages:1;
    380 	u_int seg_not_present:1;
    381 };
    382 
    383 int
    384 linux_write_ldt(p, uap, retval)
    385 	struct proc *p;
    386 	struct linux_sys_modify_ldt_args /* {
    387 		syscallarg(int) func;
    388 		syscallarg(void *) ptr;
    389 		syscallarg(size_t) bytecount;
    390 	} */ *uap;
    391 	register_t *retval;
    392 {
    393 	struct linux_ldt_info ldt_info;
    394 	struct segment_descriptor sd;
    395 	struct i386_set_ldt_args sl;
    396 	int error;
    397 	caddr_t sg;
    398 	char *parms;
    399 
    400 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    401 		return (EINVAL);
    402 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    403 		return error;
    404 	if (ldt_info.contents == 3)
    405 		return (EINVAL);
    406 
    407 	sg = stackgap_init(p->p_emul);
    408 
    409 	sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    410 	sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    411 	sd.sd_lolimit = ldt_info.limit & 0xffff;
    412 	sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    413 	sd.sd_type =
    414 	    16 | (ldt_info.contents << 2) | (!ldt_info.read_exec_only << 1);
    415 	sd.sd_dpl = SEL_UPL;
    416 	sd.sd_p = !ldt_info.seg_not_present;
    417 	sd.sd_def32 = ldt_info.seg_32bit;
    418 	sd.sd_gran = ldt_info.limit_in_pages;
    419 
    420 	sl.start = ldt_info.entry_number;
    421 	sl.desc = stackgap_alloc(&sg, sizeof(sd));
    422 	sl.num = 1;
    423 
    424 #if 0
    425 	printf("linux_write_ldt: idx=%d, base=%x, limit=%x\n",
    426 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit);
    427 #endif
    428 
    429 	parms = stackgap_alloc(&sg, sizeof(sl));
    430 
    431 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    432 		return (error);
    433 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    434 		return (error);
    435 
    436 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
    437 		return (error);
    438 
    439 	*retval = 0;
    440 	return (0);
    441 }
    442 
    443 #endif /* USER_LDT */
    444 
    445 int
    446 linux_sys_modify_ldt(p, v, retval)
    447 	struct proc *p;
    448 	void *v;
    449 	register_t *retval;
    450 {
    451 	struct linux_sys_modify_ldt_args /* {
    452 		syscallarg(int) func;
    453 		syscallarg(void *) ptr;
    454 		syscallarg(size_t) bytecount;
    455 	} */ *uap = v;
    456 
    457 	switch (SCARG(uap, func)) {
    458 #ifdef USER_LDT
    459 	case 0:
    460 		return (linux_read_ldt(p, uap, retval));
    461 
    462 	case 1:
    463 		return (linux_write_ldt(p, uap, retval));
    464 #endif /* USER_LDT */
    465 
    466 	default:
    467 		return (ENOSYS);
    468 	}
    469 }
    470 
    471 /*
    472  * XXX Pathetic hack to make svgalib work. This will fake the major
    473  * device number of an opened VT so that svgalib likes it. grmbl.
    474  * Should probably do it 'wrong the right way' and use a mapping
    475  * array for all major device numbers, and map linux_mknod too.
    476  */
    477 dev_t
    478 linux_fakedev(dev)
    479 	dev_t dev;
    480 {
    481 #if (NWSDISPLAY > 0)
    482 	if (major(dev) == NETBSD_WSCONS_MAJOR)
    483 		return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    484 #endif
    485 	return dev;
    486 }
    487 
    488 #if (NWSDISPLAY > 0)
    489 /*
    490  * That's not complete, but enough to get an X server running.
    491  */
    492 #define NR_KEYS 128
    493 static const u_short plain_map[NR_KEYS] = {
    494 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    495 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    496 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    497 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    498 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    499 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    500 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    501 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    502 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    503 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    504 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    505 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    506 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    507 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    508 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    509 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    510 }, shift_map[NR_KEYS] = {
    511 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    512 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    513 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    514 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    515 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    516 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    517 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    518 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    519 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    520 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    521 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    522 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    523 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    524 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    525 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    526 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    527 }, altgr_map[NR_KEYS] = {
    528 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    529 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    530 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    531 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    532 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    533 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    534 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    535 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    536 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    537 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    538 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    539 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    540 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    541 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    542 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    543 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    544 }, ctrl_map[NR_KEYS] = {
    545 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    546 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    547 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    548 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    549 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    550 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    551 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    552 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    553 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    554 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    555 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    556 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    557 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    558 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    559 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    560 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    561 };
    562 
    563 const u_short * const linux_keytabs[] = {
    564 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    565 };
    566 #endif
    567 
    568 static struct biosdisk_info *
    569 fd2biosinfo(p, fp)
    570 	struct proc *p;
    571 	struct file *fp;
    572 {
    573 	struct vnode *vp;
    574 	const char *blkname;
    575 	char diskname[16];
    576 	int i;
    577 	struct nativedisk_info *nip;
    578 	struct disklist *dl = i386_alldisks;
    579 
    580 	if (fp->f_type != DTYPE_VNODE)
    581 		return NULL;
    582 	vp = (struct vnode *)fp->f_data;
    583 
    584 	if (vp->v_type != VBLK)
    585 		return NULL;
    586 
    587 	blkname = findblkname(major(vp->v_rdev));
    588 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    589 	    DISKUNIT(vp->v_rdev));
    590 
    591 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    592 		nip = &dl->dl_nativedisks[i];
    593 		if (strcmp(diskname, nip->ni_devname))
    594 			continue;
    595 		if (nip->ni_nmatches != 0)
    596 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    597 	}
    598 
    599 	return NULL;
    600 }
    601 
    602 
    603 /*
    604  * We come here in a last attempt to satisfy a Linux ioctl() call
    605  */
    606 int
    607 linux_machdepioctl(p, v, retval)
    608 	struct proc *p;
    609 	void *v;
    610 	register_t *retval;
    611 {
    612 	struct linux_sys_ioctl_args /* {
    613 		syscallarg(int) fd;
    614 		syscallarg(u_long) com;
    615 		syscallarg(caddr_t) data;
    616 	} */ *uap = v;
    617 	struct sys_ioctl_args bia;
    618 	u_long com;
    619 	int error, error1;
    620 #if (NWSDISPLAY > 0)
    621 	struct vt_mode lvt;
    622 	caddr_t bvtp, sg;
    623 	struct kbentry kbe;
    624 #endif
    625 	struct linux_hd_geometry hdg;
    626 	struct linux_hd_big_geometry hdg_big;
    627 	struct biosdisk_info *bip;
    628 	struct filedesc *fdp;
    629 	struct file *fp;
    630 	int fd;
    631 	struct disklabel label, *labp;
    632 	struct partinfo partp;
    633 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    634 	u_long start, biostotal, realtotal;
    635 	u_char heads, sectors;
    636 	u_int cylinders;
    637 	struct ioctl_pt pt;
    638 
    639 	fd = SCARG(uap, fd);
    640 	SCARG(&bia, fd) = fd;
    641 	SCARG(&bia, data) = SCARG(uap, data);
    642 	com = SCARG(uap, com);
    643 
    644 	fdp = p->p_fd;
    645 
    646 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    647 		return (EBADF);
    648 
    649 	switch (com) {
    650 #if (NWSDISPLAY > 0)
    651 	case LINUX_KDGKBMODE:
    652 		com = KDGKBMODE;
    653 		break;
    654 	case LINUX_KDSKBMODE:
    655 		com = KDSKBMODE;
    656 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    657 			SCARG(&bia, data) = (caddr_t)K_RAW;
    658 		break;
    659 	case LINUX_KIOCSOUND:
    660 		SCARG(&bia, data) =
    661 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    662 		/* fall through */
    663 	case LINUX_KDMKTONE:
    664 		com = KDMKTONE;
    665 		break;
    666 	case LINUX_KDSETMODE:
    667 		com = KDSETMODE;
    668 		break;
    669 	case LINUX_KDGETMODE:
    670 		/* KD_* values are equal to the wscons numbers */
    671 		com = WSDISPLAYIO_GMODE;
    672 		break;
    673 	case LINUX_KDENABIO:
    674 		com = KDENABIO;
    675 		break;
    676 	case LINUX_KDDISABIO:
    677 		com = KDDISABIO;
    678 		break;
    679 	case LINUX_KDGETLED:
    680 		com = KDGETLED;
    681 		break;
    682 	case LINUX_KDSETLED:
    683 		com = KDSETLED;
    684 		break;
    685 	case LINUX_VT_OPENQRY:
    686 		com = VT_OPENQRY;
    687 		break;
    688 	case LINUX_VT_GETMODE:
    689 		SCARG(&bia, com) = VT_GETMODE;
    690 		if ((error = sys_ioctl(p, &bia, retval)))
    691 			return error;
    692 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    693 		    sizeof (struct vt_mode))))
    694 			return error;
    695 		lvt.relsig = native_to_linux_sig[lvt.relsig];
    696 		lvt.acqsig = native_to_linux_sig[lvt.acqsig];
    697 		lvt.frsig = native_to_linux_sig[lvt.frsig];
    698 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    699 		    sizeof (struct vt_mode));
    700 	case LINUX_VT_SETMODE:
    701 		com = VT_SETMODE;
    702 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    703 		    sizeof (struct vt_mode))))
    704 			return error;
    705 		lvt.relsig = linux_to_native_sig[lvt.relsig];
    706 		lvt.acqsig = linux_to_native_sig[lvt.acqsig];
    707 		lvt.frsig = linux_to_native_sig[lvt.frsig];
    708 		sg = stackgap_init(p->p_emul);
    709 		bvtp = stackgap_alloc(&sg, sizeof (struct vt_mode));
    710 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    711 			return error;
    712 		SCARG(&bia, data) = bvtp;
    713 		break;
    714 	case LINUX_VT_DISALLOCATE:
    715 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    716 		return 0;
    717 	case LINUX_VT_RELDISP:
    718 		com = VT_RELDISP;
    719 		break;
    720 	case LINUX_VT_ACTIVATE:
    721 		com = VT_ACTIVATE;
    722 		break;
    723 	case LINUX_VT_WAITACTIVE:
    724 		com = VT_WAITACTIVE;
    725 		break;
    726 	case LINUX_VT_GETSTATE:
    727 		com = VT_GETSTATE;
    728 		break;
    729 	case LINUX_KDGKBTYPE:
    730 		/* This is what Linux does. */
    731 		return (subyte(SCARG(uap, data), KB_101));
    732 	case LINUX_KDGKBENT:
    733 		/*
    734 		 * The Linux KDGKBENT ioctl is different from the
    735 		 * SYSV original. So we handle it in machdep code.
    736 		 * XXX We should use keyboard mapping information
    737 		 * from wsdisplay, but this would be expensive.
    738 		 */
    739 		if ((error = copyin(SCARG(uap, data), &kbe,
    740 				    sizeof(struct kbentry))))
    741 			return (error);
    742 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    743 		    || kbe.kb_index >= NR_KEYS)
    744 			return (EINVAL);
    745 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    746 		return (copyout(&kbe, SCARG(uap, data),
    747 				sizeof(struct kbentry)));
    748 #endif
    749 	case LINUX_HDIO_GETGEO:
    750 	case LINUX_HDIO_GETGEO_BIG:
    751 		/*
    752 		 * Try to mimic Linux behaviour: return the BIOS geometry
    753 		 * if possible (extending its # of cylinders if it's beyond
    754 		 * the 1023 limit), fall back to the MI geometry (i.e.
    755 		 * the real geometry) if not found, by returning an
    756 		 * error. See common/linux_hdio.c
    757 		 */
    758 		FILE_USE(fp);
    759 		bip = fd2biosinfo(p, fp);
    760 		ioctlf = fp->f_ops->fo_ioctl;
    761 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    762 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    763 		FILE_UNUSE(fp, p);
    764 		if (error != 0 && error1 != 0)
    765 			return error1;
    766 		labp = error != 0 ? &label : partp.disklab;
    767 		start = error1 != 0 ? partp.part->p_offset : 0;
    768 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    769 		    && bip->bi_cyl != 0) {
    770 			heads = bip->bi_head;
    771 			sectors = bip->bi_sec;
    772 			cylinders = bip->bi_cyl;
    773 			biostotal = heads * sectors * cylinders;
    774 			realtotal = labp->d_ntracks * labp->d_nsectors *
    775 			    labp->d_ncylinders;
    776 			if (realtotal > biostotal)
    777 				cylinders = realtotal / (heads * sectors);
    778 		} else {
    779 			heads = labp->d_ntracks;
    780 			cylinders = labp->d_ncylinders;
    781 			sectors = labp->d_nsectors;
    782 		}
    783 		if (com == LINUX_HDIO_GETGEO) {
    784 			hdg.start = start;
    785 			hdg.heads = heads;
    786 			hdg.cylinders = cylinders;
    787 			hdg.sectors = sectors;
    788 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    789 		} else {
    790 			hdg_big.start = start;
    791 			hdg_big.heads = heads;
    792 			hdg_big.cylinders = cylinders;
    793 			hdg_big.sectors = sectors;
    794 			return copyout(&hdg_big, SCARG(uap, data),
    795 			    sizeof hdg_big);
    796 		}
    797 		return 0;
    798 
    799 	default:
    800 		/*
    801 		 * Unknown to us. If it's on a device, just pass it through
    802 		 * using PTIOCLINUX, the device itself might be able to
    803 		 * make some sense of it.
    804 		 * XXX hack: if the function returns EJUSTRETURN,
    805 		 * it has stuffed a sysctl return value in pt.data.
    806 		 */
    807 		FILE_USE(fp);
    808 		ioctlf = fp->f_ops->fo_ioctl;
    809 		pt.com = SCARG(uap, com);
    810 		pt.data = SCARG(uap, data);
    811 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    812 		FILE_UNUSE(fp, p);
    813 		if (error == EJUSTRETURN) {
    814 			retval[0] = (register_t)pt.data;
    815 			error = 0;
    816 		}
    817 
    818 		if (error == ENOTTY)
    819 			printf("linux_machdepioctl: invalid ioctl %08lx\n",
    820 			    com);
    821 		return error;
    822 	}
    823 	SCARG(&bia, com) = com;
    824 	return sys_ioctl(p, &bia, retval);
    825 }
    826 
    827 /*
    828  * Set I/O permissions for a process. Just set the maximum level
    829  * right away (ignoring the argument), otherwise we would have
    830  * to rely on I/O permission maps, which are not implemented.
    831  */
    832 int
    833 linux_sys_iopl(p, v, retval)
    834 	struct proc *p;
    835 	void *v;
    836 	register_t *retval;
    837 {
    838 #if 0
    839 	struct linux_sys_iopl_args /* {
    840 		syscallarg(int) level;
    841 	} */ *uap = v;
    842 #endif
    843 	struct trapframe *fp = p->p_md.md_regs;
    844 
    845 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    846 		return EPERM;
    847 	fp->tf_eflags |= PSL_IOPL;
    848 	*retval = 0;
    849 	return 0;
    850 }
    851 
    852 /*
    853  * See above. If a root process tries to set access to an I/O port,
    854  * just let it have the whole range.
    855  */
    856 int
    857 linux_sys_ioperm(p, v, retval)
    858 	struct proc *p;
    859 	void *v;
    860 	register_t *retval;
    861 {
    862 	struct linux_sys_ioperm_args /* {
    863 		syscallarg(unsigned int) lo;
    864 		syscallarg(unsigned int) hi;
    865 		syscallarg(int) val;
    866 	} */ *uap = v;
    867 	struct trapframe *fp = p->p_md.md_regs;
    868 
    869 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    870 		return EPERM;
    871 	if (SCARG(uap, val))
    872 		fp->tf_eflags |= PSL_IOPL;
    873 	*retval = 0;
    874 	return 0;
    875 }
    876