Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.62.2.2
      1 /*	$NetBSD: linux_machdep.c,v 1.62.2.2 2001/06/21 19:59:30 nathanw Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #if defined(_KERNEL_OPT)
     40 #include "opt_vm86.h"
     41 #include "opt_user_ldt.h"
     42 #endif
     43 
     44 #include <sys/param.h>
     45 #include <sys/systm.h>
     46 #include <sys/signalvar.h>
     47 #include <sys/kernel.h>
     48 #include <sys/map.h>
     49 #include <sys/lwp.h>
     50 #include <sys/proc.h>
     51 #include <sys/user.h>
     52 #include <sys/buf.h>
     53 #include <sys/reboot.h>
     54 #include <sys/conf.h>
     55 #include <sys/exec.h>
     56 #include <sys/file.h>
     57 #include <sys/callout.h>
     58 #include <sys/malloc.h>
     59 #include <sys/mbuf.h>
     60 #include <sys/msgbuf.h>
     61 #include <sys/mount.h>
     62 #include <sys/vnode.h>
     63 #include <sys/device.h>
     64 #include <sys/syscallargs.h>
     65 #include <sys/filedesc.h>
     66 #include <sys/exec_elf.h>
     67 #include <sys/disklabel.h>
     68 #include <sys/ioctl.h>
     69 #include <miscfs/specfs/specdev.h>
     70 
     71 #include <compat/linux/common/linux_types.h>
     72 #include <compat/linux/common/linux_signal.h>
     73 #include <compat/linux/common/linux_util.h>
     74 #include <compat/linux/common/linux_ioctl.h>
     75 #include <compat/linux/common/linux_hdio.h>
     76 #include <compat/linux/common/linux_exec.h>
     77 #include <compat/linux/common/linux_machdep.h>
     78 
     79 #include <compat/linux/linux_syscallargs.h>
     80 
     81 #include <machine/cpu.h>
     82 #include <machine/cpufunc.h>
     83 #include <machine/psl.h>
     84 #include <machine/reg.h>
     85 #include <machine/segments.h>
     86 #include <machine/specialreg.h>
     87 #include <machine/sysarch.h>
     88 #include <machine/vm86.h>
     89 #include <machine/vmparam.h>
     90 
     91 /*
     92  * To see whether wscons is configured (for virtual console ioctl calls).
     93  */
     94 #if defined(_KERNEL_OPT)
     95 #include "wsdisplay.h"
     96 #endif
     97 #if (NWSDISPLAY > 0)
     98 #include <dev/wscons/wsconsio.h>
     99 #include <dev/wscons/wsdisplay_usl_io.h>
    100 #if defined(_KERNEL_OPT)
    101 #include "opt_xserver.h"
    102 #endif
    103 #endif
    104 
    105 #ifdef USER_LDT
    106 #include <machine/cpu.h>
    107 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    108     register_t *));
    109 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    110     register_t *));
    111 #endif
    112 
    113 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    114 extern struct disklist *i386_alldisks;
    115 extern const char *findblkname __P((int));
    116 
    117 /*
    118  * Deal with some i386-specific things in the Linux emulation code.
    119  */
    120 
    121 void
    122 linux_setregs(l, epp, stack)
    123 	struct lwp *l;
    124 	struct exec_package *epp;
    125 	u_long stack;
    126 {
    127 	struct pcb *pcb = &l->l_addr->u_pcb;
    128 
    129 	setregs(l, epp, stack);
    130 	pcb->pcb_savefpu.sv_env.en_cw = __Linux_NPXCW__;
    131 }
    132 
    133 /*
    134  * Send an interrupt to process.
    135  *
    136  * Stack is set up to allow sigcode stored
    137  * in u. to call routine, followed by kcall
    138  * to sigreturn routine below.  After sigreturn
    139  * resets the signal mask, the stack, and the
    140  * frame pointer, it returns to the user
    141  * specified pc, psl.
    142  */
    143 
    144 void
    145 linux_sendsig(catcher, sig, mask, code)
    146 	sig_t catcher;
    147 	int sig;
    148 	sigset_t *mask;
    149 	u_long code;
    150 {
    151 	struct lwp *l = curproc;
    152 	struct proc *p = l->l_proc;
    153 	struct trapframe *tf;
    154 	struct linux_sigframe *fp, frame;
    155 
    156 	tf = l->l_md.md_regs;
    157 
    158 	/* Allocate space for the signal handler context. */
    159 	/* XXX Linux doesn't support the signal stack. */
    160 	fp = (struct linux_sigframe *)tf->tf_esp;
    161 	fp--;
    162 
    163 	/* Build stack frame for signal trampoline. */
    164 	frame.sf_handler = catcher;
    165 	frame.sf_sig = native_to_linux_sig[sig];
    166 
    167 	/* Save register context. */
    168 #ifdef VM86
    169 	if (tf->tf_eflags & PSL_VM) {
    170 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    171 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    172 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    173 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    174 		frame.sf_sc.sc_eflags = get_vflags(l);
    175 	} else
    176 #endif
    177 	{
    178 		frame.sf_sc.sc_gs = tf->tf_gs;
    179 		frame.sf_sc.sc_fs = tf->tf_fs;
    180 		frame.sf_sc.sc_es = tf->tf_es;
    181 		frame.sf_sc.sc_ds = tf->tf_ds;
    182 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    183 	}
    184 	frame.sf_sc.sc_edi = tf->tf_edi;
    185 	frame.sf_sc.sc_esi = tf->tf_esi;
    186 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    187 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    188 	frame.sf_sc.sc_edx = tf->tf_edx;
    189 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    190 	frame.sf_sc.sc_eax = tf->tf_eax;
    191 	frame.sf_sc.sc_eip = tf->tf_eip;
    192 	frame.sf_sc.sc_cs = tf->tf_cs;
    193 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    194 	frame.sf_sc.sc_ss = tf->tf_ss;
    195 	frame.sf_sc.sc_err = tf->tf_err;
    196 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    197 
    198 	/* Save signal stack. */
    199 	/* XXX Linux doesn't support the signal stack. */
    200 
    201 	/* Save signal mask. */
    202 	native_to_linux_old_sigset(mask, &frame.sf_sc.sc_mask);
    203 
    204 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    205 		/*
    206 		 * Process has trashed its stack; give it an illegal
    207 		 * instruction to halt it in its tracks.
    208 		 */
    209 		sigexit(l, SIGILL);
    210 		/* NOTREACHED */
    211 	}
    212 
    213 	/*
    214 	 * Build context to run handler in.
    215 	 */
    216 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    217 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    218 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    219 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    220 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    221 	tf->tf_esp = (int)fp;
    222 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    223 
    224 	/* Remember that we're now on the signal stack. */
    225 	/* XXX Linux doesn't support the signal stack. */
    226 }
    227 
    228 /*
    229  * System call to cleanup state after a signal
    230  * has been taken.  Reset signal mask and
    231  * stack state from context left by sendsig (above).
    232  * Return to previous pc and psl as specified by
    233  * context left by sendsig. Check carefully to
    234  * make sure that the user has not modified the
    235  * psl to gain improper privileges or to cause
    236  * a machine fault.
    237  */
    238 int
    239 linux_sys_rt_sigreturn(l, v, retval)
    240 	struct lwp *l;
    241 	void *v;
    242 	register_t *retval;
    243 {
    244 	/* XXX XAX write me */
    245 	return(ENOSYS);
    246 }
    247 
    248 int
    249 linux_sys_sigreturn(l, v, retval)
    250 	struct lwp *l;
    251 	void *v;
    252 	register_t *retval;
    253 {
    254 	struct linux_sys_sigreturn_args /* {
    255 		syscallarg(struct linux_sigcontext *) scp;
    256 	} */ *uap = v;
    257 	struct proc *p = l->l_proc;
    258 	struct linux_sigcontext *scp, context;
    259 	struct trapframe *tf;
    260 	sigset_t mask;
    261 
    262 	/*
    263 	 * The trampoline code hands us the context.
    264 	 * It is unsafe to keep track of it ourselves, in the event that a
    265 	 * program jumps out of a signal handler.
    266 	 */
    267 	scp = SCARG(uap, scp);
    268 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    269 		return (EFAULT);
    270 
    271 	/* Restore register context. */
    272 	tf = l->l_md.md_regs;
    273 #ifdef VM86
    274 	if (context.sc_eflags & PSL_VM) {
    275 		tf->tf_vm86_gs = context.sc_gs;
    276 		tf->tf_vm86_fs = context.sc_fs;
    277 		tf->tf_vm86_es = context.sc_es;
    278 		tf->tf_vm86_ds = context.sc_ds;
    279 		set_vflags(l, context.sc_eflags);
    280 	} else
    281 #endif
    282 	{
    283 		/*
    284 		 * Check for security violations.  If we're returning to
    285 		 * protected mode, the CPU will validate the segment registers
    286 		 * automatically and generate a trap on violations.  We handle
    287 		 * the trap, rather than doing all of the checking here.
    288 		 */
    289 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    290 		    !USERMODE(context.sc_cs, context.sc_eflags))
    291 			return (EINVAL);
    292 
    293 		/* %fs and %gs were restored by the trampoline. */
    294 		tf->tf_es = context.sc_es;
    295 		tf->tf_ds = context.sc_ds;
    296 		tf->tf_eflags = context.sc_eflags;
    297 	}
    298 	tf->tf_edi = context.sc_edi;
    299 	tf->tf_esi = context.sc_esi;
    300 	tf->tf_ebp = context.sc_ebp;
    301 	tf->tf_ebx = context.sc_ebx;
    302 	tf->tf_edx = context.sc_edx;
    303 	tf->tf_ecx = context.sc_ecx;
    304 	tf->tf_eax = context.sc_eax;
    305 	tf->tf_eip = context.sc_eip;
    306 	tf->tf_cs = context.sc_cs;
    307 	tf->tf_esp = context.sc_esp_at_signal;
    308 	tf->tf_ss = context.sc_ss;
    309 
    310 	/* Restore signal stack. */
    311 	p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    312 
    313 	/* Restore signal mask. */
    314 	linux_old_to_native_sigset(&context.sc_mask, &mask);
    315 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    316 
    317 	return (EJUSTRETURN);
    318 }
    319 
    320 #ifdef USER_LDT
    321 
    322 int
    323 linux_read_ldt(l, uap, retval)
    324 	struct lwp *l;
    325 	struct linux_sys_modify_ldt_args /* {
    326 		syscallarg(int) func;
    327 		syscallarg(void *) ptr;
    328 		syscallarg(size_t) bytecount;
    329 	} */ *uap;
    330 	register_t *retval;
    331 {
    332 	struct proc *p = l->l_proc;
    333 	struct i386_get_ldt_args gl;
    334 	int error;
    335 	caddr_t sg;
    336 	char *parms;
    337 
    338 	sg = stackgap_init(p->p_emul);
    339 
    340 	gl.start = 0;
    341 	gl.desc = SCARG(uap, ptr);
    342 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    343 
    344 	parms = stackgap_alloc(&sg, sizeof(gl));
    345 
    346 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    347 		return (error);
    348 
    349 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    350 		return (error);
    351 
    352 	*retval *= sizeof(union descriptor);
    353 	return (0);
    354 }
    355 
    356 struct linux_ldt_info {
    357 	u_int entry_number;
    358 	u_long base_addr;
    359 	u_int limit;
    360 	u_int seg_32bit:1;
    361 	u_int contents:2;
    362 	u_int read_exec_only:1;
    363 	u_int limit_in_pages:1;
    364 	u_int seg_not_present:1;
    365 };
    366 
    367 int
    368 linux_write_ldt(l, uap, retval)
    369 	struct lwp *l;
    370 	struct linux_sys_modify_ldt_args /* {
    371 		syscallarg(int) func;
    372 		syscallarg(void *) ptr;
    373 		syscallarg(size_t) bytecount;
    374 	} */ *uap;
    375 	register_t *retval;
    376 {
    377 	struct proc *p = l->l_proc;
    378 	struct linux_ldt_info ldt_info;
    379 	struct segment_descriptor sd;
    380 	struct i386_set_ldt_args sl;
    381 	int error;
    382 	caddr_t sg;
    383 	char *parms;
    384 
    385 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    386 		return (EINVAL);
    387 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    388 		return error;
    389 	if (ldt_info.contents == 3)
    390 		return (EINVAL);
    391 
    392 	sg = stackgap_init(p->p_emul);
    393 
    394 	sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    395 	sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    396 	sd.sd_lolimit = ldt_info.limit & 0xffff;
    397 	sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    398 	sd.sd_type =
    399 	    16 | (ldt_info.contents << 2) | (!ldt_info.read_exec_only << 1);
    400 	sd.sd_dpl = SEL_UPL;
    401 	sd.sd_p = !ldt_info.seg_not_present;
    402 	sd.sd_def32 = ldt_info.seg_32bit;
    403 	sd.sd_gran = ldt_info.limit_in_pages;
    404 
    405 	sl.start = ldt_info.entry_number;
    406 	sl.desc = stackgap_alloc(&sg, sizeof(sd));
    407 	sl.num = 1;
    408 
    409 #if 0
    410 	printf("linux_write_ldt: idx=%d, base=%x, limit=%x\n",
    411 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit);
    412 #endif
    413 
    414 	parms = stackgap_alloc(&sg, sizeof(sl));
    415 
    416 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    417 		return (error);
    418 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    419 		return (error);
    420 
    421 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    422 		return (error);
    423 
    424 	*retval = 0;
    425 	return (0);
    426 }
    427 
    428 #endif /* USER_LDT */
    429 
    430 int
    431 linux_sys_modify_ldt(l, v, retval)
    432 	struct lwp *l;
    433 	void *v;
    434 	register_t *retval;
    435 {
    436 	struct linux_sys_modify_ldt_args /* {
    437 		syscallarg(int) func;
    438 		syscallarg(void *) ptr;
    439 		syscallarg(size_t) bytecount;
    440 	} */ *uap = v;
    441 
    442 	switch (SCARG(uap, func)) {
    443 #ifdef USER_LDT
    444 	case 0:
    445 		return (linux_read_ldt(l, uap, retval));
    446 
    447 	case 1:
    448 		return (linux_write_ldt(l, uap, retval));
    449 #endif /* USER_LDT */
    450 
    451 	default:
    452 		return (ENOSYS);
    453 	}
    454 }
    455 
    456 /*
    457  * XXX Pathetic hack to make svgalib work. This will fake the major
    458  * device number of an opened VT so that svgalib likes it. grmbl.
    459  * Should probably do it 'wrong the right way' and use a mapping
    460  * array for all major device numbers, and map linux_mknod too.
    461  */
    462 dev_t
    463 linux_fakedev(dev)
    464 	dev_t dev;
    465 {
    466 #if (NWSDISPLAY > 0)
    467 	if (major(dev) == NETBSD_WSCONS_MAJOR)
    468 		return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    469 #endif
    470 	return dev;
    471 }
    472 
    473 #if (NWSDISPLAY > 0)
    474 /*
    475  * That's not complete, but enough to get an X server running.
    476  */
    477 #define NR_KEYS 128
    478 static const u_short plain_map[NR_KEYS] = {
    479 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    480 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    481 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    482 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    483 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    484 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    485 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    486 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    487 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    488 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    489 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    490 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    491 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    492 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    493 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    494 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    495 }, shift_map[NR_KEYS] = {
    496 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    497 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    498 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    499 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    500 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    501 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    502 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    503 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    504 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    505 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    506 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    507 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    508 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    509 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    510 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    511 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    512 }, altgr_map[NR_KEYS] = {
    513 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    514 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    515 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    516 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    517 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    518 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    519 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    520 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    521 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    522 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    523 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    524 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    525 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    526 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    527 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    528 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    529 }, ctrl_map[NR_KEYS] = {
    530 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    531 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    532 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    533 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    534 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    535 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    536 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    537 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    538 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    539 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    540 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    541 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    542 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    543 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    544 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    545 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    546 };
    547 
    548 const u_short * const linux_keytabs[] = {
    549 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    550 };
    551 #endif
    552 
    553 static struct biosdisk_info *
    554 fd2biosinfo(p, fp)
    555 	struct proc *p;
    556 	struct file *fp;
    557 {
    558 	struct vnode *vp;
    559 	const char *blkname;
    560 	char diskname[16];
    561 	int i;
    562 	struct nativedisk_info *nip;
    563 	struct disklist *dl = i386_alldisks;
    564 
    565 	if (fp->f_type != DTYPE_VNODE)
    566 		return NULL;
    567 	vp = (struct vnode *)fp->f_data;
    568 
    569 	if (vp->v_type != VBLK)
    570 		return NULL;
    571 
    572 	blkname = findblkname(major(vp->v_rdev));
    573 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    574 	    DISKUNIT(vp->v_rdev));
    575 
    576 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    577 		nip = &dl->dl_nativedisks[i];
    578 		if (strcmp(diskname, nip->ni_devname))
    579 			continue;
    580 		if (nip->ni_nmatches != 0)
    581 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    582 	}
    583 
    584 	return NULL;
    585 }
    586 
    587 
    588 /*
    589  * We come here in a last attempt to satisfy a Linux ioctl() call
    590  */
    591 int
    592 linux_machdepioctl(p, v, retval)
    593 	struct proc *p;
    594 	void *v;
    595 	register_t *retval;
    596 {
    597 	struct linux_sys_ioctl_args /* {
    598 		syscallarg(int) fd;
    599 		syscallarg(u_long) com;
    600 		syscallarg(caddr_t) data;
    601 	} */ *uap = v;
    602 	struct sys_ioctl_args bia;
    603 	u_long com;
    604 	int error, error1;
    605 #if (NWSDISPLAY > 0)
    606 	struct vt_mode lvt;
    607 	caddr_t bvtp, sg;
    608 	struct kbentry kbe;
    609 #endif
    610 	struct linux_hd_geometry hdg;
    611 	struct linux_hd_big_geometry hdg_big;
    612 	struct biosdisk_info *bip;
    613 	struct filedesc *fdp;
    614 	struct file *fp;
    615 	int fd;
    616 	struct disklabel label, *labp;
    617 	struct partinfo partp;
    618 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    619 	u_long start, biostotal, realtotal;
    620 	u_char heads, sectors;
    621 	u_int cylinders;
    622 	struct ioctl_pt pt;
    623 
    624 	fd = SCARG(uap, fd);
    625 	SCARG(&bia, fd) = fd;
    626 	SCARG(&bia, data) = SCARG(uap, data);
    627 	com = SCARG(uap, com);
    628 
    629 	fdp = p->p_fd;
    630 
    631 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    632 		return (EBADF);
    633 
    634 	switch (com) {
    635 #if (NWSDISPLAY > 0)
    636 	case LINUX_KDGKBMODE:
    637 		com = KDGKBMODE;
    638 		break;
    639 	case LINUX_KDSKBMODE:
    640 		com = KDSKBMODE;
    641 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    642 			SCARG(&bia, data) = (caddr_t)K_RAW;
    643 		break;
    644 	case LINUX_KIOCSOUND:
    645 		SCARG(&bia, data) =
    646 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    647 		/* fall through */
    648 	case LINUX_KDMKTONE:
    649 		com = KDMKTONE;
    650 		break;
    651 	case LINUX_KDSETMODE:
    652 		com = KDSETMODE;
    653 		break;
    654 	case LINUX_KDGETMODE:
    655 		/* KD_* values are equal to the wscons numbers */
    656 		com = WSDISPLAYIO_GMODE;
    657 		break;
    658 	case LINUX_KDENABIO:
    659 		com = KDENABIO;
    660 		break;
    661 	case LINUX_KDDISABIO:
    662 		com = KDDISABIO;
    663 		break;
    664 	case LINUX_KDGETLED:
    665 		com = KDGETLED;
    666 		break;
    667 	case LINUX_KDSETLED:
    668 		com = KDSETLED;
    669 		break;
    670 	case LINUX_VT_OPENQRY:
    671 		com = VT_OPENQRY;
    672 		break;
    673 	case LINUX_VT_GETMODE:
    674 		SCARG(&bia, com) = VT_GETMODE;
    675 		/* XXX NJWLWP */
    676 		if ((error = sys_ioctl(curproc, &bia, retval)))
    677 			return error;
    678 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    679 		    sizeof (struct vt_mode))))
    680 			return error;
    681 		lvt.relsig = native_to_linux_sig[lvt.relsig];
    682 		lvt.acqsig = native_to_linux_sig[lvt.acqsig];
    683 		lvt.frsig = native_to_linux_sig[lvt.frsig];
    684 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    685 		    sizeof (struct vt_mode));
    686 	case LINUX_VT_SETMODE:
    687 		com = VT_SETMODE;
    688 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    689 		    sizeof (struct vt_mode))))
    690 			return error;
    691 		lvt.relsig = linux_to_native_sig[lvt.relsig];
    692 		lvt.acqsig = linux_to_native_sig[lvt.acqsig];
    693 		lvt.frsig = linux_to_native_sig[lvt.frsig];
    694 		sg = stackgap_init(p->p_emul);
    695 		bvtp = stackgap_alloc(&sg, sizeof (struct vt_mode));
    696 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    697 			return error;
    698 		SCARG(&bia, data) = bvtp;
    699 		break;
    700 	case LINUX_VT_DISALLOCATE:
    701 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    702 		return 0;
    703 	case LINUX_VT_RELDISP:
    704 		com = VT_RELDISP;
    705 		break;
    706 	case LINUX_VT_ACTIVATE:
    707 		com = VT_ACTIVATE;
    708 		break;
    709 	case LINUX_VT_WAITACTIVE:
    710 		com = VT_WAITACTIVE;
    711 		break;
    712 	case LINUX_VT_GETSTATE:
    713 		com = VT_GETSTATE;
    714 		break;
    715 	case LINUX_KDGKBTYPE:
    716 		/* This is what Linux does. */
    717 		return (subyte(SCARG(uap, data), KB_101));
    718 	case LINUX_KDGKBENT:
    719 		/*
    720 		 * The Linux KDGKBENT ioctl is different from the
    721 		 * SYSV original. So we handle it in machdep code.
    722 		 * XXX We should use keyboard mapping information
    723 		 * from wsdisplay, but this would be expensive.
    724 		 */
    725 		if ((error = copyin(SCARG(uap, data), &kbe,
    726 				    sizeof(struct kbentry))))
    727 			return (error);
    728 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    729 		    || kbe.kb_index >= NR_KEYS)
    730 			return (EINVAL);
    731 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    732 		return (copyout(&kbe, SCARG(uap, data),
    733 				sizeof(struct kbentry)));
    734 #endif
    735 	case LINUX_HDIO_GETGEO:
    736 	case LINUX_HDIO_GETGEO_BIG:
    737 		/*
    738 		 * Try to mimic Linux behaviour: return the BIOS geometry
    739 		 * if possible (extending its # of cylinders if it's beyond
    740 		 * the 1023 limit), fall back to the MI geometry (i.e.
    741 		 * the real geometry) if not found, by returning an
    742 		 * error. See common/linux_hdio.c
    743 		 */
    744 		FILE_USE(fp);
    745 		bip = fd2biosinfo(p, fp);
    746 		ioctlf = fp->f_ops->fo_ioctl;
    747 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    748 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    749 		FILE_UNUSE(fp, p);
    750 		if (error != 0 && error1 != 0)
    751 			return error1;
    752 		labp = error != 0 ? &label : partp.disklab;
    753 		start = error1 != 0 ? partp.part->p_offset : 0;
    754 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    755 		    && bip->bi_cyl != 0) {
    756 			heads = bip->bi_head;
    757 			sectors = bip->bi_sec;
    758 			cylinders = bip->bi_cyl;
    759 			biostotal = heads * sectors * cylinders;
    760 			realtotal = labp->d_ntracks * labp->d_nsectors *
    761 			    labp->d_ncylinders;
    762 			if (realtotal > biostotal)
    763 				cylinders = realtotal / (heads * sectors);
    764 		} else {
    765 			heads = labp->d_ntracks;
    766 			cylinders = labp->d_ncylinders;
    767 			sectors = labp->d_nsectors;
    768 		}
    769 		if (com == LINUX_HDIO_GETGEO) {
    770 			hdg.start = start;
    771 			hdg.heads = heads;
    772 			hdg.cylinders = cylinders;
    773 			hdg.sectors = sectors;
    774 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    775 		} else {
    776 			hdg_big.start = start;
    777 			hdg_big.heads = heads;
    778 			hdg_big.cylinders = cylinders;
    779 			hdg_big.sectors = sectors;
    780 			return copyout(&hdg_big, SCARG(uap, data),
    781 			    sizeof hdg_big);
    782 		}
    783 		return 0;
    784 
    785 	default:
    786 		/*
    787 		 * Unknown to us. If it's on a device, just pass it through
    788 		 * using PTIOCLINUX, the device itself might be able to
    789 		 * make some sense of it.
    790 		 * XXX hack: if the function returns EJUSTRETURN,
    791 		 * it has stuffed a sysctl return value in pt.data.
    792 		 */
    793 		FILE_USE(fp);
    794 		ioctlf = fp->f_ops->fo_ioctl;
    795 		pt.com = SCARG(uap, com);
    796 		pt.data = SCARG(uap, data);
    797 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    798 		FILE_UNUSE(fp, p);
    799 		if (error == EJUSTRETURN) {
    800 			retval[0] = (register_t)pt.data;
    801 			error = 0;
    802 		}
    803 
    804 		if (error == ENOTTY)
    805 			printf("linux_machdepioctl: invalid ioctl %08lx\n",
    806 			    com);
    807 		return error;
    808 	}
    809 	SCARG(&bia, com) = com;
    810 	/* XXX NJWLWP */
    811 	return sys_ioctl(curproc, &bia, retval);
    812 }
    813 
    814 /*
    815  * Set I/O permissions for a process. Just set the maximum level
    816  * right away (ignoring the argument), otherwise we would have
    817  * to rely on I/O permission maps, which are not implemented.
    818  */
    819 int
    820 linux_sys_iopl(l, v, retval)
    821 	struct lwp *l;
    822 	void *v;
    823 	register_t *retval;
    824 {
    825 #if 0
    826 	struct linux_sys_iopl_args /* {
    827 		syscallarg(int) level;
    828 	} */ *uap = v;
    829 #endif
    830 	struct proc *p = l->l_proc;
    831 	struct trapframe *fp = l->l_md.md_regs;
    832 
    833 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    834 		return EPERM;
    835 	fp->tf_eflags |= PSL_IOPL;
    836 	*retval = 0;
    837 	return 0;
    838 }
    839 
    840 /*
    841  * See above. If a root process tries to set access to an I/O port,
    842  * just let it have the whole range.
    843  */
    844 int
    845 linux_sys_ioperm(l, v, retval)
    846 	struct lwp *l;
    847 	void *v;
    848 	register_t *retval;
    849 {
    850 	struct linux_sys_ioperm_args /* {
    851 		syscallarg(unsigned int) lo;
    852 		syscallarg(unsigned int) hi;
    853 		syscallarg(int) val;
    854 	} */ *uap = v;
    855 	struct proc *p = l->l_proc;
    856 	struct trapframe *fp = l->l_md.md_regs;
    857 
    858 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    859 		return EPERM;
    860 	if (SCARG(uap, val))
    861 		fp->tf_eflags |= PSL_IOPL;
    862 	*retval = 0;
    863 	return 0;
    864 }
    865