Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.69
      1 /*	$NetBSD: linux_machdep.c,v 1.69 2002/02/15 16:48:00 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.69 2002/02/15 16:48:00 christos Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/map.h>
     52 #include <sys/proc.h>
     53 #include <sys/user.h>
     54 #include <sys/buf.h>
     55 #include <sys/reboot.h>
     56 #include <sys/conf.h>
     57 #include <sys/exec.h>
     58 #include <sys/file.h>
     59 #include <sys/callout.h>
     60 #include <sys/malloc.h>
     61 #include <sys/mbuf.h>
     62 #include <sys/msgbuf.h>
     63 #include <sys/mount.h>
     64 #include <sys/vnode.h>
     65 #include <sys/device.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <miscfs/specfs/specdev.h>
     72 
     73 #include <compat/linux/common/linux_types.h>
     74 #include <compat/linux/common/linux_signal.h>
     75 #include <compat/linux/common/linux_util.h>
     76 #include <compat/linux/common/linux_ioctl.h>
     77 #include <compat/linux/common/linux_hdio.h>
     78 #include <compat/linux/common/linux_exec.h>
     79 #include <compat/linux/common/linux_machdep.h>
     80 
     81 #include <compat/linux/linux_syscallargs.h>
     82 
     83 #include <machine/cpu.h>
     84 #include <machine/cpufunc.h>
     85 #include <machine/psl.h>
     86 #include <machine/reg.h>
     87 #include <machine/segments.h>
     88 #include <machine/specialreg.h>
     89 #include <machine/sysarch.h>
     90 #include <machine/vm86.h>
     91 #include <machine/vmparam.h>
     92 
     93 /*
     94  * To see whether wscons is configured (for virtual console ioctl calls).
     95  */
     96 #if defined(_KERNEL_OPT)
     97 #include "wsdisplay.h"
     98 #endif
     99 #if (NWSDISPLAY > 0)
    100 #include <dev/wscons/wsconsio.h>
    101 #include <dev/wscons/wsdisplay_usl_io.h>
    102 #if defined(_KERNEL_OPT)
    103 #include "opt_xserver.h"
    104 #endif
    105 #endif
    106 
    107 #ifdef USER_LDT
    108 #include <machine/cpu.h>
    109 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    110     register_t *));
    111 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    112     register_t *));
    113 #endif
    114 
    115 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    116 extern struct disklist *i386_alldisks;
    117 extern const char *findblkname __P((int));
    118 
    119 /*
    120  * Deal with some i386-specific things in the Linux emulation code.
    121  */
    122 
    123 void
    124 linux_setregs(p, epp, stack)
    125 	struct proc *p;
    126 	struct exec_package *epp;
    127 	u_long stack;
    128 {
    129 	struct pcb *pcb = &p->p_addr->u_pcb;
    130 
    131 	setregs(p, epp, stack);
    132 	if (i386_use_fxsave)
    133 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    134 	else
    135 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    136 }
    137 
    138 /*
    139  * Send an interrupt to process.
    140  *
    141  * Stack is set up to allow sigcode stored
    142  * in u. to call routine, followed by kcall
    143  * to sigreturn routine below.  After sigreturn
    144  * resets the signal mask, the stack, and the
    145  * frame pointer, it returns to the user
    146  * specified pc, psl.
    147  */
    148 
    149 void
    150 linux_sendsig(catcher, sig, mask, code)
    151 	sig_t catcher;
    152 	int sig;
    153 	sigset_t *mask;
    154 	u_long code;
    155 {
    156 	struct proc *p = curproc;
    157 	struct trapframe *tf;
    158 	struct linux_sigframe *fp, frame;
    159 	int onstack;
    160 
    161 	tf = p->p_md.md_regs;
    162 
    163 	/* Do we need to jump onto the signal stack? */
    164 	onstack =
    165 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    166 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    167 
    168 	/* Allocate space for the signal handler context. */
    169 	if (onstack)
    170 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    171 					  p->p_sigctx.ps_sigstk.ss_size);
    172 	else
    173 		fp = (struct linux_sigframe *)tf->tf_esp;
    174 	fp--;
    175 
    176 	/* Build stack frame for signal trampoline. */
    177 	frame.sf_handler = catcher;
    178 	frame.sf_sig = native_to_linux_sig[sig];
    179 
    180 	/* Save register context. */
    181 #ifdef VM86
    182 	if (tf->tf_eflags & PSL_VM) {
    183 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    184 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    185 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    186 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    187 		frame.sf_sc.sc_eflags = get_vflags(p);
    188 	} else
    189 #endif
    190 	{
    191 		frame.sf_sc.sc_gs = tf->tf_gs;
    192 		frame.sf_sc.sc_fs = tf->tf_fs;
    193 		frame.sf_sc.sc_es = tf->tf_es;
    194 		frame.sf_sc.sc_ds = tf->tf_ds;
    195 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    196 	}
    197 	frame.sf_sc.sc_edi = tf->tf_edi;
    198 	frame.sf_sc.sc_esi = tf->tf_esi;
    199 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    200 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    201 	frame.sf_sc.sc_edx = tf->tf_edx;
    202 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    203 	frame.sf_sc.sc_eax = tf->tf_eax;
    204 	frame.sf_sc.sc_eip = tf->tf_eip;
    205 	frame.sf_sc.sc_cs = tf->tf_cs;
    206 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    207 	frame.sf_sc.sc_ss = tf->tf_ss;
    208 	frame.sf_sc.sc_err = tf->tf_err;
    209 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    210 
    211 	/* Save signal stack. */
    212 	/* Linux doesn't save the onstack flag in sigframe */
    213 
    214 	/* Save signal mask. */
    215 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
    216 
    217 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    218 		/*
    219 		 * Process has trashed its stack; give it an illegal
    220 		 * instruction to halt it in its tracks.
    221 		 */
    222 		sigexit(p, SIGILL);
    223 		/* NOTREACHED */
    224 	}
    225 
    226 	/*
    227 	 * Build context to run handler in.
    228 	 */
    229 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    230 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    231 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    232 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    233 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    234 	tf->tf_esp = (int)fp;
    235 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    236 
    237 	/* Remember that we're now on the signal stack. */
    238 	if (onstack)
    239 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    240 }
    241 
    242 /*
    243  * System call to cleanup state after a signal
    244  * has been taken.  Reset signal mask and
    245  * stack state from context left by sendsig (above).
    246  * Return to previous pc and psl as specified by
    247  * context left by sendsig. Check carefully to
    248  * make sure that the user has not modified the
    249  * psl to gain improper privileges or to cause
    250  * a machine fault.
    251  */
    252 int
    253 linux_sys_rt_sigreturn(p, v, retval)
    254 	struct proc *p;
    255 	void *v;
    256 	register_t *retval;
    257 {
    258 	/* XXX XAX write me */
    259 	return(ENOSYS);
    260 }
    261 
    262 int
    263 linux_sys_sigreturn(p, v, retval)
    264 	struct proc *p;
    265 	void *v;
    266 	register_t *retval;
    267 {
    268 	struct linux_sys_sigreturn_args /* {
    269 		syscallarg(struct linux_sigcontext *) scp;
    270 	} */ *uap = v;
    271 	struct linux_sigcontext *scp, context;
    272 	struct trapframe *tf;
    273 	sigset_t mask;
    274 	ssize_t ss_gap;
    275 
    276 	/*
    277 	 * The trampoline code hands us the context.
    278 	 * It is unsafe to keep track of it ourselves, in the event that a
    279 	 * program jumps out of a signal handler.
    280 	 */
    281 	scp = SCARG(uap, scp);
    282 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    283 		return (EFAULT);
    284 
    285 	/* Restore register context. */
    286 	tf = p->p_md.md_regs;
    287 #ifdef VM86
    288 	if (context.sc_eflags & PSL_VM) {
    289 		tf->tf_vm86_gs = context.sc_gs;
    290 		tf->tf_vm86_fs = context.sc_fs;
    291 		tf->tf_vm86_es = context.sc_es;
    292 		tf->tf_vm86_ds = context.sc_ds;
    293 		set_vflags(p, context.sc_eflags);
    294 	} else
    295 #endif
    296 	{
    297 		/*
    298 		 * Check for security violations.  If we're returning to
    299 		 * protected mode, the CPU will validate the segment registers
    300 		 * automatically and generate a trap on violations.  We handle
    301 		 * the trap, rather than doing all of the checking here.
    302 		 */
    303 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    304 		    !USERMODE(context.sc_cs, context.sc_eflags))
    305 			return (EINVAL);
    306 
    307 		/* %fs and %gs were restored by the trampoline. */
    308 		tf->tf_es = context.sc_es;
    309 		tf->tf_ds = context.sc_ds;
    310 		tf->tf_eflags = context.sc_eflags;
    311 	}
    312 	tf->tf_edi = context.sc_edi;
    313 	tf->tf_esi = context.sc_esi;
    314 	tf->tf_ebp = context.sc_ebp;
    315 	tf->tf_ebx = context.sc_ebx;
    316 	tf->tf_edx = context.sc_edx;
    317 	tf->tf_ecx = context.sc_ecx;
    318 	tf->tf_eax = context.sc_eax;
    319 	tf->tf_eip = context.sc_eip;
    320 	tf->tf_cs = context.sc_cs;
    321 	tf->tf_esp = context.sc_esp_at_signal;
    322 	tf->tf_ss = context.sc_ss;
    323 
    324 	/* Restore signal stack. */
    325 	/*
    326 	 * Linux really does it this way; it doesn't have space in sigframe
    327 	 * to save the onstack flag.
    328 	 */
    329 	ss_gap = (ssize_t)
    330 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    331 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    332 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    333 	else
    334 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    335 
    336 	/* Restore signal mask. */
    337 	linux_old_to_native_sigset(&mask, &context.sc_mask);
    338 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    339 
    340 	return (EJUSTRETURN);
    341 }
    342 
    343 #ifdef USER_LDT
    344 
    345 int
    346 linux_read_ldt(p, uap, retval)
    347 	struct proc *p;
    348 	struct linux_sys_modify_ldt_args /* {
    349 		syscallarg(int) func;
    350 		syscallarg(void *) ptr;
    351 		syscallarg(size_t) bytecount;
    352 	} */ *uap;
    353 	register_t *retval;
    354 {
    355 	struct i386_get_ldt_args gl;
    356 	int error;
    357 	caddr_t sg;
    358 	char *parms;
    359 
    360 	sg = stackgap_init(p->p_emul);
    361 
    362 	gl.start = 0;
    363 	gl.desc = SCARG(uap, ptr);
    364 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    365 
    366 	parms = stackgap_alloc(&sg, sizeof(gl));
    367 
    368 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    369 		return (error);
    370 
    371 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
    372 		return (error);
    373 
    374 	*retval *= sizeof(union descriptor);
    375 	return (0);
    376 }
    377 
    378 struct linux_ldt_info {
    379 	u_int entry_number;
    380 	u_long base_addr;
    381 	u_int limit;
    382 	u_int seg_32bit:1;
    383 	u_int contents:2;
    384 	u_int read_exec_only:1;
    385 	u_int limit_in_pages:1;
    386 	u_int seg_not_present:1;
    387 };
    388 
    389 int
    390 linux_write_ldt(p, uap, retval)
    391 	struct proc *p;
    392 	struct linux_sys_modify_ldt_args /* {
    393 		syscallarg(int) func;
    394 		syscallarg(void *) ptr;
    395 		syscallarg(size_t) bytecount;
    396 	} */ *uap;
    397 	register_t *retval;
    398 {
    399 	struct linux_ldt_info ldt_info;
    400 	struct segment_descriptor sd;
    401 	struct i386_set_ldt_args sl;
    402 	int error;
    403 	caddr_t sg;
    404 	char *parms;
    405 
    406 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    407 		return (EINVAL);
    408 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    409 		return error;
    410 	if (ldt_info.contents == 3)
    411 		return (EINVAL);
    412 
    413 	sg = stackgap_init(p->p_emul);
    414 
    415 	sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    416 	sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    417 	sd.sd_lolimit = ldt_info.limit & 0xffff;
    418 	sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    419 	sd.sd_type =
    420 	    16 | (ldt_info.contents << 2) | (!ldt_info.read_exec_only << 1);
    421 	sd.sd_dpl = SEL_UPL;
    422 	sd.sd_p = !ldt_info.seg_not_present;
    423 	sd.sd_def32 = ldt_info.seg_32bit;
    424 	sd.sd_gran = ldt_info.limit_in_pages;
    425 
    426 	sl.start = ldt_info.entry_number;
    427 	sl.desc = stackgap_alloc(&sg, sizeof(sd));
    428 	sl.num = 1;
    429 
    430 #if 0
    431 	printf("linux_write_ldt: idx=%d, base=%x, limit=%x\n",
    432 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit);
    433 #endif
    434 
    435 	parms = stackgap_alloc(&sg, sizeof(sl));
    436 
    437 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    438 		return (error);
    439 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    440 		return (error);
    441 
    442 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
    443 		return (error);
    444 
    445 	*retval = 0;
    446 	return (0);
    447 }
    448 
    449 #endif /* USER_LDT */
    450 
    451 int
    452 linux_sys_modify_ldt(p, v, retval)
    453 	struct proc *p;
    454 	void *v;
    455 	register_t *retval;
    456 {
    457 	struct linux_sys_modify_ldt_args /* {
    458 		syscallarg(int) func;
    459 		syscallarg(void *) ptr;
    460 		syscallarg(size_t) bytecount;
    461 	} */ *uap = v;
    462 
    463 	switch (SCARG(uap, func)) {
    464 #ifdef USER_LDT
    465 	case 0:
    466 		return (linux_read_ldt(p, uap, retval));
    467 
    468 	case 1:
    469 		return (linux_write_ldt(p, uap, retval));
    470 #endif /* USER_LDT */
    471 
    472 	default:
    473 		return (ENOSYS);
    474 	}
    475 }
    476 
    477 /*
    478  * XXX Pathetic hack to make svgalib work. This will fake the major
    479  * device number of an opened VT so that svgalib likes it. grmbl.
    480  * Should probably do it 'wrong the right way' and use a mapping
    481  * array for all major device numbers, and map linux_mknod too.
    482  */
    483 dev_t
    484 linux_fakedev(dev, raw)
    485 	dev_t dev;
    486 	int raw;
    487 {
    488 	if (raw) {
    489 #if (NWSDISPLAY > 0)
    490 		if (major(dev) == NETBSD_WSCONS_MAJOR)
    491 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    492 #endif
    493 		return 0;
    494 	} else {
    495 		return dev;
    496 	}
    497 }
    498 
    499 #if (NWSDISPLAY > 0)
    500 /*
    501  * That's not complete, but enough to get an X server running.
    502  */
    503 #define NR_KEYS 128
    504 static const u_short plain_map[NR_KEYS] = {
    505 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    506 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    507 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    508 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    509 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    510 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    511 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    512 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    513 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    514 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    515 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    516 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    517 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    518 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    519 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    520 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    521 }, shift_map[NR_KEYS] = {
    522 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    523 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    524 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    525 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    526 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    527 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    528 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    529 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    530 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    531 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    532 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    533 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    534 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    535 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    536 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    537 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    538 }, altgr_map[NR_KEYS] = {
    539 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    540 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    541 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    542 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    543 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    544 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    545 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    546 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    547 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    548 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    549 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    550 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    551 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    552 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    553 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    554 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    555 }, ctrl_map[NR_KEYS] = {
    556 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    557 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    558 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    559 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    560 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    561 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    562 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    563 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    564 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    565 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    566 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    567 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    568 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    569 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    570 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    571 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    572 };
    573 
    574 const u_short * const linux_keytabs[] = {
    575 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    576 };
    577 #endif
    578 
    579 static struct biosdisk_info *
    580 fd2biosinfo(p, fp)
    581 	struct proc *p;
    582 	struct file *fp;
    583 {
    584 	struct vnode *vp;
    585 	const char *blkname;
    586 	char diskname[16];
    587 	int i;
    588 	struct nativedisk_info *nip;
    589 	struct disklist *dl = i386_alldisks;
    590 
    591 	if (fp->f_type != DTYPE_VNODE)
    592 		return NULL;
    593 	vp = (struct vnode *)fp->f_data;
    594 
    595 	if (vp->v_type != VBLK)
    596 		return NULL;
    597 
    598 	blkname = findblkname(major(vp->v_rdev));
    599 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    600 	    DISKUNIT(vp->v_rdev));
    601 
    602 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    603 		nip = &dl->dl_nativedisks[i];
    604 		if (strcmp(diskname, nip->ni_devname))
    605 			continue;
    606 		if (nip->ni_nmatches != 0)
    607 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    608 	}
    609 
    610 	return NULL;
    611 }
    612 
    613 
    614 /*
    615  * We come here in a last attempt to satisfy a Linux ioctl() call
    616  */
    617 int
    618 linux_machdepioctl(p, v, retval)
    619 	struct proc *p;
    620 	void *v;
    621 	register_t *retval;
    622 {
    623 	struct linux_sys_ioctl_args /* {
    624 		syscallarg(int) fd;
    625 		syscallarg(u_long) com;
    626 		syscallarg(caddr_t) data;
    627 	} */ *uap = v;
    628 	struct sys_ioctl_args bia;
    629 	u_long com;
    630 	int error, error1;
    631 #if (NWSDISPLAY > 0)
    632 	struct vt_mode lvt;
    633 	caddr_t bvtp, sg;
    634 	struct kbentry kbe;
    635 #endif
    636 	struct linux_hd_geometry hdg;
    637 	struct linux_hd_big_geometry hdg_big;
    638 	struct biosdisk_info *bip;
    639 	struct filedesc *fdp;
    640 	struct file *fp;
    641 	int fd;
    642 	struct disklabel label, *labp;
    643 	struct partinfo partp;
    644 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    645 	u_long start, biostotal, realtotal;
    646 	u_char heads, sectors;
    647 	u_int cylinders;
    648 	struct ioctl_pt pt;
    649 
    650 	fd = SCARG(uap, fd);
    651 	SCARG(&bia, fd) = fd;
    652 	SCARG(&bia, data) = SCARG(uap, data);
    653 	com = SCARG(uap, com);
    654 
    655 	fdp = p->p_fd;
    656 
    657 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    658 		return (EBADF);
    659 
    660 	switch (com) {
    661 #if (NWSDISPLAY > 0)
    662 	case LINUX_KDGKBMODE:
    663 		com = KDGKBMODE;
    664 		break;
    665 	case LINUX_KDSKBMODE:
    666 		com = KDSKBMODE;
    667 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    668 			SCARG(&bia, data) = (caddr_t)K_RAW;
    669 		break;
    670 	case LINUX_KIOCSOUND:
    671 		SCARG(&bia, data) =
    672 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    673 		/* fall through */
    674 	case LINUX_KDMKTONE:
    675 		com = KDMKTONE;
    676 		break;
    677 	case LINUX_KDSETMODE:
    678 		com = KDSETMODE;
    679 		break;
    680 	case LINUX_KDGETMODE:
    681 		/* KD_* values are equal to the wscons numbers */
    682 		com = WSDISPLAYIO_GMODE;
    683 		break;
    684 	case LINUX_KDENABIO:
    685 		com = KDENABIO;
    686 		break;
    687 	case LINUX_KDDISABIO:
    688 		com = KDDISABIO;
    689 		break;
    690 	case LINUX_KDGETLED:
    691 		com = KDGETLED;
    692 		break;
    693 	case LINUX_KDSETLED:
    694 		com = KDSETLED;
    695 		break;
    696 	case LINUX_VT_OPENQRY:
    697 		com = VT_OPENQRY;
    698 		break;
    699 	case LINUX_VT_GETMODE:
    700 		SCARG(&bia, com) = VT_GETMODE;
    701 		if ((error = sys_ioctl(p, &bia, retval)))
    702 			return error;
    703 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    704 		    sizeof (struct vt_mode))))
    705 			return error;
    706 		lvt.relsig = native_to_linux_sig[lvt.relsig];
    707 		lvt.acqsig = native_to_linux_sig[lvt.acqsig];
    708 		lvt.frsig = native_to_linux_sig[lvt.frsig];
    709 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    710 		    sizeof (struct vt_mode));
    711 	case LINUX_VT_SETMODE:
    712 		com = VT_SETMODE;
    713 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    714 		    sizeof (struct vt_mode))))
    715 			return error;
    716 		lvt.relsig = linux_to_native_sig[lvt.relsig];
    717 		lvt.acqsig = linux_to_native_sig[lvt.acqsig];
    718 		lvt.frsig = linux_to_native_sig[lvt.frsig];
    719 		sg = stackgap_init(p->p_emul);
    720 		bvtp = stackgap_alloc(&sg, sizeof (struct vt_mode));
    721 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    722 			return error;
    723 		SCARG(&bia, data) = bvtp;
    724 		break;
    725 	case LINUX_VT_DISALLOCATE:
    726 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    727 		return 0;
    728 	case LINUX_VT_RELDISP:
    729 		com = VT_RELDISP;
    730 		break;
    731 	case LINUX_VT_ACTIVATE:
    732 		com = VT_ACTIVATE;
    733 		break;
    734 	case LINUX_VT_WAITACTIVE:
    735 		com = VT_WAITACTIVE;
    736 		break;
    737 	case LINUX_VT_GETSTATE:
    738 		com = VT_GETSTATE;
    739 		break;
    740 	case LINUX_KDGKBTYPE:
    741 		/* This is what Linux does. */
    742 		return (subyte(SCARG(uap, data), KB_101));
    743 	case LINUX_KDGKBENT:
    744 		/*
    745 		 * The Linux KDGKBENT ioctl is different from the
    746 		 * SYSV original. So we handle it in machdep code.
    747 		 * XXX We should use keyboard mapping information
    748 		 * from wsdisplay, but this would be expensive.
    749 		 */
    750 		if ((error = copyin(SCARG(uap, data), &kbe,
    751 				    sizeof(struct kbentry))))
    752 			return (error);
    753 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    754 		    || kbe.kb_index >= NR_KEYS)
    755 			return (EINVAL);
    756 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    757 		return (copyout(&kbe, SCARG(uap, data),
    758 				sizeof(struct kbentry)));
    759 #endif
    760 	case LINUX_HDIO_GETGEO:
    761 	case LINUX_HDIO_GETGEO_BIG:
    762 		/*
    763 		 * Try to mimic Linux behaviour: return the BIOS geometry
    764 		 * if possible (extending its # of cylinders if it's beyond
    765 		 * the 1023 limit), fall back to the MI geometry (i.e.
    766 		 * the real geometry) if not found, by returning an
    767 		 * error. See common/linux_hdio.c
    768 		 */
    769 		FILE_USE(fp);
    770 		bip = fd2biosinfo(p, fp);
    771 		ioctlf = fp->f_ops->fo_ioctl;
    772 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    773 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    774 		FILE_UNUSE(fp, p);
    775 		if (error != 0 && error1 != 0)
    776 			return error1;
    777 		labp = error != 0 ? &label : partp.disklab;
    778 		start = error1 != 0 ? partp.part->p_offset : 0;
    779 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    780 		    && bip->bi_cyl != 0) {
    781 			heads = bip->bi_head;
    782 			sectors = bip->bi_sec;
    783 			cylinders = bip->bi_cyl;
    784 			biostotal = heads * sectors * cylinders;
    785 			realtotal = labp->d_ntracks * labp->d_nsectors *
    786 			    labp->d_ncylinders;
    787 			if (realtotal > biostotal)
    788 				cylinders = realtotal / (heads * sectors);
    789 		} else {
    790 			heads = labp->d_ntracks;
    791 			cylinders = labp->d_ncylinders;
    792 			sectors = labp->d_nsectors;
    793 		}
    794 		if (com == LINUX_HDIO_GETGEO) {
    795 			hdg.start = start;
    796 			hdg.heads = heads;
    797 			hdg.cylinders = cylinders;
    798 			hdg.sectors = sectors;
    799 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    800 		} else {
    801 			hdg_big.start = start;
    802 			hdg_big.heads = heads;
    803 			hdg_big.cylinders = cylinders;
    804 			hdg_big.sectors = sectors;
    805 			return copyout(&hdg_big, SCARG(uap, data),
    806 			    sizeof hdg_big);
    807 		}
    808 		return 0;
    809 
    810 	default:
    811 		/*
    812 		 * Unknown to us. If it's on a device, just pass it through
    813 		 * using PTIOCLINUX, the device itself might be able to
    814 		 * make some sense of it.
    815 		 * XXX hack: if the function returns EJUSTRETURN,
    816 		 * it has stuffed a sysctl return value in pt.data.
    817 		 */
    818 		FILE_USE(fp);
    819 		ioctlf = fp->f_ops->fo_ioctl;
    820 		pt.com = SCARG(uap, com);
    821 		pt.data = SCARG(uap, data);
    822 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    823 		FILE_UNUSE(fp, p);
    824 		if (error == EJUSTRETURN) {
    825 			retval[0] = (register_t)pt.data;
    826 			error = 0;
    827 		}
    828 
    829 		if (error == ENOTTY)
    830 			printf("linux_machdepioctl: invalid ioctl %08lx\n",
    831 			    com);
    832 		return error;
    833 	}
    834 	SCARG(&bia, com) = com;
    835 	return sys_ioctl(p, &bia, retval);
    836 }
    837 
    838 /*
    839  * Set I/O permissions for a process. Just set the maximum level
    840  * right away (ignoring the argument), otherwise we would have
    841  * to rely on I/O permission maps, which are not implemented.
    842  */
    843 int
    844 linux_sys_iopl(p, v, retval)
    845 	struct proc *p;
    846 	void *v;
    847 	register_t *retval;
    848 {
    849 #if 0
    850 	struct linux_sys_iopl_args /* {
    851 		syscallarg(int) level;
    852 	} */ *uap = v;
    853 #endif
    854 	struct trapframe *fp = p->p_md.md_regs;
    855 
    856 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    857 		return EPERM;
    858 	fp->tf_eflags |= PSL_IOPL;
    859 	*retval = 0;
    860 	return 0;
    861 }
    862 
    863 /*
    864  * See above. If a root process tries to set access to an I/O port,
    865  * just let it have the whole range.
    866  */
    867 int
    868 linux_sys_ioperm(p, v, retval)
    869 	struct proc *p;
    870 	void *v;
    871 	register_t *retval;
    872 {
    873 	struct linux_sys_ioperm_args /* {
    874 		syscallarg(unsigned int) lo;
    875 		syscallarg(unsigned int) hi;
    876 		syscallarg(int) val;
    877 	} */ *uap = v;
    878 	struct trapframe *fp = p->p_md.md_regs;
    879 
    880 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    881 		return EPERM;
    882 	if (SCARG(uap, val))
    883 		fp->tf_eflags |= PSL_IOPL;
    884 	*retval = 0;
    885 	return 0;
    886 }
    887