Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.62.2.1
      1 /*	$NetBSD: linux_machdep.c,v 1.62.2.1 2001/03/05 22:49:23 nathanw Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #if defined(KERNEL) && !defined(_LKM)
     40 #include "opt_vm86.h"
     41 #include "opt_user_ldt.h"
     42 #endif
     43 
     44 #include <sys/param.h>
     45 #include <sys/systm.h>
     46 #include <sys/signalvar.h>
     47 #include <sys/kernel.h>
     48 #include <sys/map.h>
     49 #include <sys/lwp.h>
     50 #include <sys/proc.h>
     51 #include <sys/user.h>
     52 #include <sys/buf.h>
     53 #include <sys/reboot.h>
     54 #include <sys/conf.h>
     55 #include <sys/exec.h>
     56 #include <sys/file.h>
     57 #include <sys/callout.h>
     58 #include <sys/malloc.h>
     59 #include <sys/mbuf.h>
     60 #include <sys/msgbuf.h>
     61 #include <sys/mount.h>
     62 #include <sys/vnode.h>
     63 #include <sys/device.h>
     64 #include <sys/syscallargs.h>
     65 #include <sys/filedesc.h>
     66 #include <sys/exec_elf.h>
     67 #include <sys/disklabel.h>
     68 #include <sys/ioctl.h>
     69 #include <miscfs/specfs/specdev.h>
     70 
     71 #include <compat/linux/common/linux_types.h>
     72 #include <compat/linux/common/linux_signal.h>
     73 #include <compat/linux/common/linux_util.h>
     74 #include <compat/linux/common/linux_ioctl.h>
     75 #include <compat/linux/common/linux_hdio.h>
     76 #include <compat/linux/common/linux_exec.h>
     77 #include <compat/linux/common/linux_machdep.h>
     78 
     79 #include <compat/linux/linux_syscallargs.h>
     80 
     81 #include <machine/cpu.h>
     82 #include <machine/cpufunc.h>
     83 #include <machine/psl.h>
     84 #include <machine/reg.h>
     85 #include <machine/segments.h>
     86 #include <machine/specialreg.h>
     87 #include <machine/sysarch.h>
     88 #include <machine/vm86.h>
     89 #include <machine/vmparam.h>
     90 
     91 /*
     92  * To see whether wscons is configured (for virtual console ioctl calls).
     93  */
     94 #if defined(_KERNEL) && !defined(_LKM)
     95 #include "wsdisplay.h"
     96 #endif
     97 #if (NWSDISPLAY > 0)
     98 #include <dev/wscons/wsconsio.h>
     99 #include <dev/wscons/wsdisplay_usl_io.h>
    100 #if defined(_KERNEL) && !defined(_LKM)
    101 #include "opt_xserver.h"
    102 #endif
    103 #endif
    104 
    105 #ifdef USER_LDT
    106 #include <machine/cpu.h>
    107 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    108     register_t *));
    109 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    110     register_t *));
    111 #endif
    112 
    113 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    114 extern struct disklist *i386_alldisks;
    115 extern const char *findblkname __P((int));
    116 
    117 /*
    118  * Deal with some i386-specific things in the Linux emulation code.
    119  */
    120 
    121 void
    122 linux_setregs(l, epp, stack)
    123 	struct lwp *l;
    124 	struct exec_package *epp;
    125 	u_long stack;
    126 {
    127 	struct pcb *pcb = &l->l_addr->u_pcb;
    128 
    129 	setregs(l, epp, stack);
    130 	pcb->pcb_savefpu.sv_env.en_cw = __Linux_NPXCW__;
    131 }
    132 
    133 /*
    134  * Send an interrupt to process.
    135  *
    136  * Stack is set up to allow sigcode stored
    137  * in u. to call routine, followed by kcall
    138  * to sigreturn routine below.  After sigreturn
    139  * resets the signal mask, the stack, and the
    140  * frame pointer, it returns to the user
    141  * specified pc, psl.
    142  */
    143 
    144 void
    145 linux_sendsig(catcher, sig, mask, code)
    146 	sig_t catcher;
    147 	int sig;
    148 	sigset_t *mask;
    149 	u_long code;
    150 {
    151 	struct lwp *l = curproc;
    152 	struct proc *p = l->l_proc;
    153 	struct trapframe *tf;
    154 	struct linux_sigframe *fp, frame;
    155 
    156 	tf = l->l_md.md_regs;
    157 
    158 	/* Allocate space for the signal handler context. */
    159 	/* XXX Linux doesn't support the signal stack. */
    160 	fp = (struct linux_sigframe *)tf->tf_esp;
    161 	fp--;
    162 
    163 	/* Build stack frame for signal trampoline. */
    164 	frame.sf_handler = catcher;
    165 	frame.sf_sig = native_to_linux_sig[sig];
    166 
    167 	/* Save register context. */
    168 #ifdef VM86
    169 	if (tf->tf_eflags & PSL_VM) {
    170 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    171 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    172 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    173 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    174 		frame.sf_sc.sc_eflags = get_vflags(l);
    175 	} else
    176 #endif
    177 	{
    178 		__asm("movl %%gs,%w0" : "=r" (frame.sf_sc.sc_gs));
    179 		__asm("movl %%fs,%w0" : "=r" (frame.sf_sc.sc_fs));
    180 		frame.sf_sc.sc_es = tf->tf_es;
    181 		frame.sf_sc.sc_ds = tf->tf_ds;
    182 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    183 	}
    184 	frame.sf_sc.sc_edi = tf->tf_edi;
    185 	frame.sf_sc.sc_esi = tf->tf_esi;
    186 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    187 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    188 	frame.sf_sc.sc_edx = tf->tf_edx;
    189 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    190 	frame.sf_sc.sc_eax = tf->tf_eax;
    191 	frame.sf_sc.sc_eip = tf->tf_eip;
    192 	frame.sf_sc.sc_cs = tf->tf_cs;
    193 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    194 	frame.sf_sc.sc_ss = tf->tf_ss;
    195 	frame.sf_sc.sc_err = tf->tf_err;
    196 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    197 
    198 	/* Save signal stack. */
    199 	/* XXX Linux doesn't support the signal stack. */
    200 
    201 	/* Save signal mask. */
    202 	native_to_linux_old_sigset(mask, &frame.sf_sc.sc_mask);
    203 
    204 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    205 		/*
    206 		 * Process has trashed its stack; give it an illegal
    207 		 * instruction to halt it in its tracks.
    208 		 */
    209 		sigexit(l, SIGILL);
    210 		/* NOTREACHED */
    211 	}
    212 
    213 	/*
    214 	 * Build context to run handler in.
    215 	 */
    216 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    217 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    218 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    219 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    220 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    221 	tf->tf_esp = (int)fp;
    222 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    223 
    224 	/* Remember that we're now on the signal stack. */
    225 	/* XXX Linux doesn't support the signal stack. */
    226 }
    227 
    228 /*
    229  * System call to cleanup state after a signal
    230  * has been taken.  Reset signal mask and
    231  * stack state from context left by sendsig (above).
    232  * Return to previous pc and psl as specified by
    233  * context left by sendsig. Check carefully to
    234  * make sure that the user has not modified the
    235  * psl to gain improper privileges or to cause
    236  * a machine fault.
    237  */
    238 int
    239 linux_sys_rt_sigreturn(l, v, retval)
    240 	struct lwp *l;
    241 	void *v;
    242 	register_t *retval;
    243 {
    244 	/* XXX XAX write me */
    245 	return(ENOSYS);
    246 }
    247 
    248 int
    249 linux_sys_sigreturn(l, v, retval)
    250 	struct lwp *l;
    251 	void *v;
    252 	register_t *retval;
    253 {
    254 	struct linux_sys_sigreturn_args /* {
    255 		syscallarg(struct linux_sigcontext *) scp;
    256 	} */ *uap = v;
    257 	struct proc *p = l->l_proc;
    258 	struct linux_sigcontext *scp, context;
    259 	struct trapframe *tf;
    260 	sigset_t mask;
    261 
    262 	/*
    263 	 * The trampoline code hands us the context.
    264 	 * It is unsafe to keep track of it ourselves, in the event that a
    265 	 * program jumps out of a signal handler.
    266 	 */
    267 	scp = SCARG(uap, scp);
    268 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    269 		return (EFAULT);
    270 
    271 	/* Restore register context. */
    272 	tf = l->l_md.md_regs;
    273 #ifdef VM86
    274 	if (context.sc_eflags & PSL_VM) {
    275 		tf->tf_vm86_gs = context.sc_gs;
    276 		tf->tf_vm86_fs = context.sc_fs;
    277 		tf->tf_vm86_es = context.sc_es;
    278 		tf->tf_vm86_ds = context.sc_ds;
    279 		set_vflags(l, context.sc_eflags);
    280 	} else
    281 #endif
    282 	{
    283 		/*
    284 		 * Check for security violations.  If we're returning to
    285 		 * protected mode, the CPU will validate the segment registers
    286 		 * automatically and generate a trap on violations.  We handle
    287 		 * the trap, rather than doing all of the checking here.
    288 		 */
    289 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    290 		    !USERMODE(context.sc_cs, context.sc_eflags))
    291 			return (EINVAL);
    292 
    293 		/* %fs and %gs were restored by the trampoline. */
    294 		tf->tf_es = context.sc_es;
    295 		tf->tf_ds = context.sc_ds;
    296 		tf->tf_eflags = context.sc_eflags;
    297 	}
    298 	tf->tf_edi = context.sc_edi;
    299 	tf->tf_esi = context.sc_esi;
    300 	tf->tf_ebp = context.sc_ebp;
    301 	tf->tf_ebx = context.sc_ebx;
    302 	tf->tf_edx = context.sc_edx;
    303 	tf->tf_ecx = context.sc_ecx;
    304 	tf->tf_eax = context.sc_eax;
    305 	tf->tf_eip = context.sc_eip;
    306 	tf->tf_cs = context.sc_cs;
    307 	tf->tf_esp = context.sc_esp_at_signal;
    308 	tf->tf_ss = context.sc_ss;
    309 
    310 	/* Restore signal stack. */
    311 	p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    312 
    313 	/* Restore signal mask. */
    314 	linux_old_to_native_sigset(&context.sc_mask, &mask);
    315 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    316 
    317 	return (EJUSTRETURN);
    318 }
    319 
    320 #ifdef USER_LDT
    321 
    322 int
    323 linux_read_ldt(l, uap, retval)
    324 	struct lwp *l;
    325 	struct linux_sys_modify_ldt_args /* {
    326 		syscallarg(int) func;
    327 		syscallarg(void *) ptr;
    328 		syscallarg(size_t) bytecount;
    329 	} */ *uap;
    330 	register_t *retval;
    331 {
    332 	struct proc *p = l->l_proc;
    333 	struct i386_get_ldt_args gl;
    334 	int error;
    335 	caddr_t sg;
    336 	char *parms;
    337 
    338 	sg = stackgap_init(p->p_emul);
    339 
    340 	gl.start = 0;
    341 	gl.desc = SCARG(uap, ptr);
    342 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    343 
    344 	parms = stackgap_alloc(&sg, sizeof(gl));
    345 
    346 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    347 		return (error);
    348 
    349 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    350 		return (error);
    351 
    352 	*retval *= sizeof(union descriptor);
    353 	return (0);
    354 }
    355 
    356 struct linux_ldt_info {
    357 	u_int entry_number;
    358 	u_long base_addr;
    359 	u_int limit;
    360 	u_int seg_32bit:1;
    361 	u_int contents:2;
    362 	u_int read_exec_only:1;
    363 	u_int limit_in_pages:1;
    364 	u_int seg_not_present:1;
    365 };
    366 
    367 int
    368 linux_write_ldt(l, uap, retval)
    369 	struct lwp *l;
    370 	struct linux_sys_modify_ldt_args /* {
    371 		syscallarg(int) func;
    372 		syscallarg(void *) ptr;
    373 		syscallarg(size_t) bytecount;
    374 	} */ *uap;
    375 	register_t *retval;
    376 {
    377 	struct proc *p = l->l_proc;
    378 	struct linux_ldt_info ldt_info;
    379 	struct segment_descriptor sd;
    380 	struct i386_set_ldt_args sl;
    381 	int error;
    382 	caddr_t sg;
    383 	char *parms;
    384 
    385 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    386 		return (EINVAL);
    387 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    388 		return error;
    389 	if (ldt_info.contents == 3)
    390 		return (EINVAL);
    391 
    392 	sg = stackgap_init(p->p_emul);
    393 
    394 	sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    395 	sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    396 	sd.sd_lolimit = ldt_info.limit & 0xffff;
    397 	sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    398 	sd.sd_type =
    399 	    16 | (ldt_info.contents << 2) | (!ldt_info.read_exec_only << 1);
    400 	sd.sd_dpl = SEL_UPL;
    401 	sd.sd_p = !ldt_info.seg_not_present;
    402 	sd.sd_def32 = ldt_info.seg_32bit;
    403 	sd.sd_gran = ldt_info.limit_in_pages;
    404 
    405 	sl.start = ldt_info.entry_number;
    406 	sl.desc = stackgap_alloc(&sg, sizeof(sd));
    407 	sl.num = 1;
    408 
    409 #if 0
    410 	printf("linux_write_ldt: idx=%d, base=%x, limit=%x\n",
    411 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit);
    412 #endif
    413 
    414 	parms = stackgap_alloc(&sg, sizeof(sl));
    415 
    416 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    417 		return (error);
    418 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    419 		return (error);
    420 
    421 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    422 		return (error);
    423 
    424 	*retval = 0;
    425 	return (0);
    426 }
    427 
    428 #endif /* USER_LDT */
    429 
    430 int
    431 linux_sys_modify_ldt(l, v, retval)
    432 	struct lwp *l;
    433 	void *v;
    434 	register_t *retval;
    435 {
    436 	struct linux_sys_modify_ldt_args /* {
    437 		syscallarg(int) func;
    438 		syscallarg(void *) ptr;
    439 		syscallarg(size_t) bytecount;
    440 	} */ *uap = v;
    441 
    442 	switch (SCARG(uap, func)) {
    443 #ifdef USER_LDT
    444 	case 0:
    445 		return (linux_read_ldt(l, uap, retval));
    446 
    447 	case 1:
    448 		return (linux_write_ldt(l, uap, retval));
    449 #endif /* USER_LDT */
    450 
    451 	default:
    452 		return (ENOSYS);
    453 	}
    454 }
    455 
    456 /*
    457  * XXX Pathetic hack to make svgalib work. This will fake the major
    458  * device number of an opened VT so that svgalib likes it. grmbl.
    459  * Should probably do it 'wrong the right way' and use a mapping
    460  * array for all major device numbers, and map linux_mknod too.
    461  */
    462 dev_t
    463 linux_fakedev(dev)
    464 	dev_t dev;
    465 {
    466 #if (NWSDISPLAY > 0)
    467 	if (major(dev) == NETBSD_WSCONS_MAJOR)
    468 		return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    469 #endif
    470 	return dev;
    471 }
    472 
    473 #if (NWSDISPLAY > 0)
    474 /*
    475  * That's not complete, but enough to get an X server running.
    476  */
    477 #define NR_KEYS 128
    478 static const u_short plain_map[NR_KEYS] = {
    479 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    480 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    481 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    482 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    483 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    484 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    485 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    486 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    487 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    488 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    489 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    490 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    491 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    492 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    493 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    494 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    495 }, shift_map[NR_KEYS] = {
    496 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    497 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    498 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    499 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    500 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    501 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    502 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    503 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    504 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    505 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    506 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    507 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    508 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    509 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    510 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    511 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    512 }, altgr_map[NR_KEYS] = {
    513 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    514 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    515 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    516 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    517 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    518 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    519 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    520 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    521 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    522 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    523 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    524 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    525 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    526 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    527 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    528 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    529 }, ctrl_map[NR_KEYS] = {
    530 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    531 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    532 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    533 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    534 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    535 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    536 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    537 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    538 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    539 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    540 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    541 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    542 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    543 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    544 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    545 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    546 };
    547 
    548 const u_short * const linux_keytabs[] = {
    549 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    550 };
    551 #endif
    552 
    553 static struct biosdisk_info *
    554 fd2biosinfo(p, fp)
    555 	struct proc *p;
    556 	struct file *fp;
    557 {
    558 	struct vnode *vp;
    559 	const char *blkname;
    560 	char diskname[16];
    561 	int i;
    562 	struct nativedisk_info *nip;
    563 	struct disklist *dl = i386_alldisks;
    564 
    565 	if (fp->f_type != DTYPE_VNODE)
    566 		return NULL;
    567 	vp = (struct vnode *)fp->f_data;
    568 
    569 	if (vp->v_type != VBLK)
    570 		return NULL;
    571 
    572 	blkname = findblkname(major(vp->v_rdev));
    573 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    574 	    DISKUNIT(vp->v_rdev));
    575 
    576 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    577 		nip = &dl->dl_nativedisks[i];
    578 		if (strcmp(diskname, nip->ni_devname))
    579 			continue;
    580 		if (nip->ni_nmatches != 0)
    581 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    582 	}
    583 
    584 	return NULL;
    585 }
    586 
    587 
    588 /*
    589  * We come here in a last attempt to satisfy a Linux ioctl() call
    590  */
    591 int
    592 linux_machdepioctl(p, v, retval)
    593 	struct proc *p;
    594 	void *v;
    595 	register_t *retval;
    596 {
    597 	struct linux_sys_ioctl_args /* {
    598 		syscallarg(int) fd;
    599 		syscallarg(u_long) com;
    600 		syscallarg(caddr_t) data;
    601 	} */ *uap = v;
    602 	struct sys_ioctl_args bia;
    603 	u_long com;
    604 	int error, error1;
    605 #if (NWSDISPLAY > 0)
    606 	struct vt_mode lvt;
    607 	caddr_t bvtp, sg;
    608 	struct kbentry kbe;
    609 #endif
    610 	struct linux_hd_geometry hdg;
    611 	struct linux_hd_big_geometry hdg_big;
    612 	struct biosdisk_info *bip;
    613 	struct filedesc *fdp;
    614 	struct file *fp;
    615 	int fd;
    616 	struct disklabel label, *labp;
    617 	struct partinfo partp;
    618 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    619 	u_long start, biostotal, realtotal;
    620 	u_char heads, sectors;
    621 	u_int cylinders;
    622 	struct ioctl_pt pt;
    623 
    624 	fd = SCARG(uap, fd);
    625 	SCARG(&bia, fd) = fd;
    626 	SCARG(&bia, data) = SCARG(uap, data);
    627 	com = SCARG(uap, com);
    628 
    629 	fdp = p->p_fd;
    630 
    631 	if ((u_int)fd >= fdp->fd_nfiles ||
    632 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
    633 	    (fp->f_iflags & FIF_WANTCLOSE) != 0)
    634 		return (EBADF);
    635 
    636 	switch (com) {
    637 #if (NWSDISPLAY > 0)
    638 	case LINUX_KDGKBMODE:
    639 		com = KDGKBMODE;
    640 		break;
    641 	case LINUX_KDSKBMODE:
    642 		com = KDSKBMODE;
    643 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    644 			SCARG(&bia, data) = (caddr_t)K_RAW;
    645 		break;
    646 	case LINUX_KIOCSOUND:
    647 		SCARG(&bia, data) =
    648 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    649 		/* fall through */
    650 	case LINUX_KDMKTONE:
    651 		com = KDMKTONE;
    652 		break;
    653 	case LINUX_KDSETMODE:
    654 		com = KDSETMODE;
    655 		break;
    656 	case LINUX_KDGETMODE:
    657 		/* KD_* values are equal to the wscons numbers */
    658 		com = WSDISPLAYIO_GMODE;
    659 		break;
    660 	case LINUX_KDENABIO:
    661 		com = KDENABIO;
    662 		break;
    663 	case LINUX_KDDISABIO:
    664 		com = KDDISABIO;
    665 		break;
    666 	case LINUX_KDGETLED:
    667 		com = KDGETLED;
    668 		break;
    669 	case LINUX_KDSETLED:
    670 		com = KDSETLED;
    671 		break;
    672 	case LINUX_VT_OPENQRY:
    673 		com = VT_OPENQRY;
    674 		break;
    675 	case LINUX_VT_GETMODE:
    676 		SCARG(&bia, com) = VT_GETMODE;
    677 		/* XXX NJWLWP */
    678 		if ((error = sys_ioctl(curproc, &bia, retval)))
    679 			return error;
    680 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    681 		    sizeof (struct vt_mode))))
    682 			return error;
    683 		lvt.relsig = native_to_linux_sig[lvt.relsig];
    684 		lvt.acqsig = native_to_linux_sig[lvt.acqsig];
    685 		lvt.frsig = native_to_linux_sig[lvt.frsig];
    686 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    687 		    sizeof (struct vt_mode));
    688 	case LINUX_VT_SETMODE:
    689 		com = VT_SETMODE;
    690 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    691 		    sizeof (struct vt_mode))))
    692 			return error;
    693 		lvt.relsig = linux_to_native_sig[lvt.relsig];
    694 		lvt.acqsig = linux_to_native_sig[lvt.acqsig];
    695 		lvt.frsig = linux_to_native_sig[lvt.frsig];
    696 		sg = stackgap_init(p->p_emul);
    697 		bvtp = stackgap_alloc(&sg, sizeof (struct vt_mode));
    698 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    699 			return error;
    700 		SCARG(&bia, data) = bvtp;
    701 		break;
    702 	case LINUX_VT_DISALLOCATE:
    703 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    704 		return 0;
    705 	case LINUX_VT_RELDISP:
    706 		com = VT_RELDISP;
    707 		break;
    708 	case LINUX_VT_ACTIVATE:
    709 		com = VT_ACTIVATE;
    710 		break;
    711 	case LINUX_VT_WAITACTIVE:
    712 		com = VT_WAITACTIVE;
    713 		break;
    714 	case LINUX_VT_GETSTATE:
    715 		com = VT_GETSTATE;
    716 		break;
    717 	case LINUX_KDGKBTYPE:
    718 		/* This is what Linux does. */
    719 		return (subyte(SCARG(uap, data), KB_101));
    720 	case LINUX_KDGKBENT:
    721 		/*
    722 		 * The Linux KDGKBENT ioctl is different from the
    723 		 * SYSV original. So we handle it in machdep code.
    724 		 * XXX We should use keyboard mapping information
    725 		 * from wsdisplay, but this would be expensive.
    726 		 */
    727 		if ((error = copyin(SCARG(uap, data), &kbe,
    728 				    sizeof(struct kbentry))))
    729 			return (error);
    730 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    731 		    || kbe.kb_index >= NR_KEYS)
    732 			return (EINVAL);
    733 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    734 		return (copyout(&kbe, SCARG(uap, data),
    735 				sizeof(struct kbentry)));
    736 #endif
    737 	case LINUX_HDIO_GETGEO:
    738 	case LINUX_HDIO_GETGEO_BIG:
    739 		/*
    740 		 * Try to mimic Linux behaviour: return the BIOS geometry
    741 		 * if possible (extending its # of cylinders if it's beyond
    742 		 * the 1023 limit), fall back to the MI geometry (i.e.
    743 		 * the real geometry) if not found, by returning an
    744 		 * error. See common/linux_hdio.c
    745 		 */
    746 		FILE_USE(fp);
    747 		bip = fd2biosinfo(p, fp);
    748 		ioctlf = fp->f_ops->fo_ioctl;
    749 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    750 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    751 		FILE_UNUSE(fp, p);
    752 		if (error != 0 && error1 != 0)
    753 			return error1;
    754 		labp = error != 0 ? &label : partp.disklab;
    755 		start = error1 != 0 ? partp.part->p_offset : 0;
    756 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    757 		    && bip->bi_cyl != 0) {
    758 			heads = bip->bi_head;
    759 			sectors = bip->bi_sec;
    760 			cylinders = bip->bi_cyl;
    761 			biostotal = heads * sectors * cylinders;
    762 			realtotal = labp->d_ntracks * labp->d_nsectors *
    763 			    labp->d_ncylinders;
    764 			if (realtotal > biostotal)
    765 				cylinders = realtotal / (heads * sectors);
    766 		} else {
    767 			heads = labp->d_ntracks;
    768 			cylinders = labp->d_ncylinders;
    769 			sectors = labp->d_nsectors;
    770 		}
    771 		if (com == LINUX_HDIO_GETGEO) {
    772 			hdg.start = start;
    773 			hdg.heads = heads;
    774 			hdg.cylinders = cylinders;
    775 			hdg.sectors = sectors;
    776 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    777 		} else {
    778 			hdg_big.start = start;
    779 			hdg_big.heads = heads;
    780 			hdg_big.cylinders = cylinders;
    781 			hdg_big.sectors = sectors;
    782 			return copyout(&hdg_big, SCARG(uap, data),
    783 			    sizeof hdg_big);
    784 		}
    785 		return 0;
    786 
    787 	default:
    788 		/*
    789 		 * Unknown to us. If it's on a device, just pass it through
    790 		 * using PTIOCLINUX, the device itself might be able to
    791 		 * make some sense of it.
    792 		 * XXX hack: if the function returns EJUSTRETURN,
    793 		 * it has stuffed a sysctl return value in pt.data.
    794 		 */
    795 		FILE_USE(fp);
    796 		ioctlf = fp->f_ops->fo_ioctl;
    797 		pt.com = SCARG(uap, com);
    798 		pt.data = SCARG(uap, data);
    799 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    800 		FILE_UNUSE(fp, p);
    801 		if (error == EJUSTRETURN) {
    802 			retval[0] = (register_t)pt.data;
    803 			error = 0;
    804 		}
    805 
    806 		if (error == ENOTTY)
    807 			printf("linux_machdepioctl: invalid ioctl %08lx\n",
    808 			    com);
    809 		return error;
    810 	}
    811 	SCARG(&bia, com) = com;
    812 	/* XXX NJWLWP */
    813 	return sys_ioctl(curproc, &bia, retval);
    814 }
    815 
    816 /*
    817  * Set I/O permissions for a process. Just set the maximum level
    818  * right away (ignoring the argument), otherwise we would have
    819  * to rely on I/O permission maps, which are not implemented.
    820  */
    821 int
    822 linux_sys_iopl(l, v, retval)
    823 	struct lwp *l;
    824 	void *v;
    825 	register_t *retval;
    826 {
    827 #if 0
    828 	struct linux_sys_iopl_args /* {
    829 		syscallarg(int) level;
    830 	} */ *uap = v;
    831 #endif
    832 	struct proc *p = l->l_proc;
    833 	struct trapframe *fp = l->l_md.md_regs;
    834 
    835 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    836 		return EPERM;
    837 	fp->tf_eflags |= PSL_IOPL;
    838 	*retval = 0;
    839 	return 0;
    840 }
    841 
    842 /*
    843  * See above. If a root process tries to set access to an I/O port,
    844  * just let it have the whole range.
    845  */
    846 int
    847 linux_sys_ioperm(l, v, retval)
    848 	struct lwp *l;
    849 	void *v;
    850 	register_t *retval;
    851 {
    852 	struct linux_sys_ioperm_args /* {
    853 		syscallarg(unsigned int) lo;
    854 		syscallarg(unsigned int) hi;
    855 		syscallarg(int) val;
    856 	} */ *uap = v;
    857 	struct proc *p = l->l_proc;
    858 	struct trapframe *fp = l->l_md.md_regs;
    859 
    860 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    861 		return EPERM;
    862 	if (SCARG(uap, val))
    863 		fp->tf_eflags |= PSL_IOPL;
    864 	*retval = 0;
    865 	return 0;
    866 }
    867