Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.62.2.3
      1 /*	$NetBSD: linux_machdep.c,v 1.62.2.3 2001/08/24 00:08:48 nathanw Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #if defined(_KERNEL_OPT)
     40 #include "opt_vm86.h"
     41 #include "opt_user_ldt.h"
     42 #endif
     43 
     44 #include <sys/param.h>
     45 #include <sys/systm.h>
     46 #include <sys/signalvar.h>
     47 #include <sys/kernel.h>
     48 #include <sys/map.h>
     49 #include <sys/lwp.h>
     50 #include <sys/proc.h>
     51 #include <sys/user.h>
     52 #include <sys/buf.h>
     53 #include <sys/reboot.h>
     54 #include <sys/conf.h>
     55 #include <sys/exec.h>
     56 #include <sys/file.h>
     57 #include <sys/callout.h>
     58 #include <sys/malloc.h>
     59 #include <sys/mbuf.h>
     60 #include <sys/msgbuf.h>
     61 #include <sys/mount.h>
     62 #include <sys/vnode.h>
     63 #include <sys/device.h>
     64 #include <sys/syscallargs.h>
     65 #include <sys/filedesc.h>
     66 #include <sys/exec_elf.h>
     67 #include <sys/disklabel.h>
     68 #include <sys/ioctl.h>
     69 #include <miscfs/specfs/specdev.h>
     70 
     71 #include <compat/linux/common/linux_types.h>
     72 #include <compat/linux/common/linux_signal.h>
     73 #include <compat/linux/common/linux_util.h>
     74 #include <compat/linux/common/linux_ioctl.h>
     75 #include <compat/linux/common/linux_hdio.h>
     76 #include <compat/linux/common/linux_exec.h>
     77 #include <compat/linux/common/linux_machdep.h>
     78 
     79 #include <compat/linux/linux_syscallargs.h>
     80 
     81 #include <machine/cpu.h>
     82 #include <machine/cpufunc.h>
     83 #include <machine/psl.h>
     84 #include <machine/reg.h>
     85 #include <machine/segments.h>
     86 #include <machine/specialreg.h>
     87 #include <machine/sysarch.h>
     88 #include <machine/vm86.h>
     89 #include <machine/vmparam.h>
     90 
     91 /*
     92  * To see whether wscons is configured (for virtual console ioctl calls).
     93  */
     94 #if defined(_KERNEL_OPT)
     95 #include "wsdisplay.h"
     96 #endif
     97 #if (NWSDISPLAY > 0)
     98 #include <dev/wscons/wsconsio.h>
     99 #include <dev/wscons/wsdisplay_usl_io.h>
    100 #if defined(_KERNEL_OPT)
    101 #include "opt_xserver.h"
    102 #endif
    103 #endif
    104 
    105 #ifdef USER_LDT
    106 #include <machine/cpu.h>
    107 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    108     register_t *));
    109 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    110     register_t *));
    111 #endif
    112 
    113 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    114 extern struct disklist *i386_alldisks;
    115 extern const char *findblkname __P((int));
    116 
    117 /*
    118  * Deal with some i386-specific things in the Linux emulation code.
    119  */
    120 
    121 void
    122 linux_setregs(l, epp, stack)
    123 	struct lwp *l;
    124 	struct exec_package *epp;
    125 	u_long stack;
    126 {
    127 	struct pcb *pcb = &l->l_addr->u_pcb;
    128 
    129 	setregs(l, epp, stack);
    130 	if (i386_use_fxsave)
    131 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    132 	else
    133 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    134 }
    135 
    136 /*
    137  * Send an interrupt to process.
    138  *
    139  * Stack is set up to allow sigcode stored
    140  * in u. to call routine, followed by kcall
    141  * to sigreturn routine below.  After sigreturn
    142  * resets the signal mask, the stack, and the
    143  * frame pointer, it returns to the user
    144  * specified pc, psl.
    145  */
    146 
    147 void
    148 linux_sendsig(catcher, sig, mask, code)
    149 	sig_t catcher;
    150 	int sig;
    151 	sigset_t *mask;
    152 	u_long code;
    153 {
    154 	struct lwp *l = curproc;
    155 	struct proc *p = l->l_proc;
    156 	struct trapframe *tf;
    157 	struct linux_sigframe *fp, frame;
    158 	int onstack;
    159 
    160 	tf = l->l_md.md_regs;
    161 	/* Do we need to jump onto the signal stack? */
    162 	onstack =
    163 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    164 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    165 
    166 	/* Allocate space for the signal handler context. */
    167 	if (onstack)
    168 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    169 					  p->p_sigctx.ps_sigstk.ss_size);
    170 	else
    171 		fp = (struct linux_sigframe *)tf->tf_esp;
    172 	fp--;
    173 
    174 	/* Build stack frame for signal trampoline. */
    175 	frame.sf_handler = catcher;
    176 	frame.sf_sig = native_to_linux_sig[sig];
    177 
    178 	/* Save register context. */
    179 #ifdef VM86
    180 	if (tf->tf_eflags & PSL_VM) {
    181 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    182 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    183 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    184 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    185 		frame.sf_sc.sc_eflags = get_vflags(l);
    186 	} else
    187 #endif
    188 	{
    189 		frame.sf_sc.sc_gs = tf->tf_gs;
    190 		frame.sf_sc.sc_fs = tf->tf_fs;
    191 		frame.sf_sc.sc_es = tf->tf_es;
    192 		frame.sf_sc.sc_ds = tf->tf_ds;
    193 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    194 	}
    195 	frame.sf_sc.sc_edi = tf->tf_edi;
    196 	frame.sf_sc.sc_esi = tf->tf_esi;
    197 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    198 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    199 	frame.sf_sc.sc_edx = tf->tf_edx;
    200 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    201 	frame.sf_sc.sc_eax = tf->tf_eax;
    202 	frame.sf_sc.sc_eip = tf->tf_eip;
    203 	frame.sf_sc.sc_cs = tf->tf_cs;
    204 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    205 	frame.sf_sc.sc_ss = tf->tf_ss;
    206 	frame.sf_sc.sc_err = tf->tf_err;
    207 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    208 
    209 	/* Save signal stack. */
    210 	/* Linux doesn't save the onstack flag in sigframe */
    211 
    212 	/* Save signal mask. */
    213 	native_to_linux_old_sigset(mask, &frame.sf_sc.sc_mask);
    214 
    215 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    216 		/*
    217 		 * Process has trashed its stack; give it an illegal
    218 		 * instruction to halt it in its tracks.
    219 		 */
    220 		sigexit(l, SIGILL);
    221 		/* NOTREACHED */
    222 	}
    223 
    224 	/*
    225 	 * Build context to run handler in.
    226 	 */
    227 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    228 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    229 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    230 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    231 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    232 	tf->tf_esp = (int)fp;
    233 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    234 
    235 	/* Remember that we're now on the signal stack. */
    236 	if (onstack)
    237 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    238 }
    239 
    240 /*
    241  * System call to cleanup state after a signal
    242  * has been taken.  Reset signal mask and
    243  * stack state from context left by sendsig (above).
    244  * Return to previous pc and psl as specified by
    245  * context left by sendsig. Check carefully to
    246  * make sure that the user has not modified the
    247  * psl to gain improper privileges or to cause
    248  * a machine fault.
    249  */
    250 int
    251 linux_sys_rt_sigreturn(l, v, retval)
    252 	struct lwp *l;
    253 	void *v;
    254 	register_t *retval;
    255 {
    256 	/* XXX XAX write me */
    257 	return(ENOSYS);
    258 }
    259 
    260 int
    261 linux_sys_sigreturn(l, v, retval)
    262 	struct lwp *l;
    263 	void *v;
    264 	register_t *retval;
    265 {
    266 	struct linux_sys_sigreturn_args /* {
    267 		syscallarg(struct linux_sigcontext *) scp;
    268 	} */ *uap = v;
    269 	struct proc *p = l->l_proc;
    270 	struct linux_sigcontext *scp, context;
    271 	struct trapframe *tf;
    272 	sigset_t mask;
    273 	ssize_t ss_gap;
    274 
    275 	/*
    276 	 * The trampoline code hands us the context.
    277 	 * It is unsafe to keep track of it ourselves, in the event that a
    278 	 * program jumps out of a signal handler.
    279 	 */
    280 	scp = SCARG(uap, scp);
    281 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    282 		return (EFAULT);
    283 
    284 	/* Restore register context. */
    285 	tf = l->l_md.md_regs;
    286 #ifdef VM86
    287 	if (context.sc_eflags & PSL_VM) {
    288 		tf->tf_vm86_gs = context.sc_gs;
    289 		tf->tf_vm86_fs = context.sc_fs;
    290 		tf->tf_vm86_es = context.sc_es;
    291 		tf->tf_vm86_ds = context.sc_ds;
    292 		set_vflags(l, context.sc_eflags);
    293 	} else
    294 #endif
    295 	{
    296 		/*
    297 		 * Check for security violations.  If we're returning to
    298 		 * protected mode, the CPU will validate the segment registers
    299 		 * automatically and generate a trap on violations.  We handle
    300 		 * the trap, rather than doing all of the checking here.
    301 		 */
    302 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    303 		    !USERMODE(context.sc_cs, context.sc_eflags))
    304 			return (EINVAL);
    305 
    306 		/* %fs and %gs were restored by the trampoline. */
    307 		tf->tf_es = context.sc_es;
    308 		tf->tf_ds = context.sc_ds;
    309 		tf->tf_eflags = context.sc_eflags;
    310 	}
    311 	tf->tf_edi = context.sc_edi;
    312 	tf->tf_esi = context.sc_esi;
    313 	tf->tf_ebp = context.sc_ebp;
    314 	tf->tf_ebx = context.sc_ebx;
    315 	tf->tf_edx = context.sc_edx;
    316 	tf->tf_ecx = context.sc_ecx;
    317 	tf->tf_eax = context.sc_eax;
    318 	tf->tf_eip = context.sc_eip;
    319 	tf->tf_cs = context.sc_cs;
    320 	tf->tf_esp = context.sc_esp_at_signal;
    321 	tf->tf_ss = context.sc_ss;
    322 
    323 	/* Restore signal stack. */
    324 	/*
    325 	 * Linux really does it this way; it doesn't have space in sigframe
    326 	 * to save the onstack flag.
    327 	 */
    328 	ss_gap = (ssize_t)
    329 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    330 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    331 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    332 	else
    333 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    334 
    335 	/* Restore signal mask. */
    336 	linux_old_to_native_sigset(&context.sc_mask, &mask);
    337 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    338 
    339 	return (EJUSTRETURN);
    340 }
    341 
    342 #ifdef USER_LDT
    343 
    344 int
    345 linux_read_ldt(l, uap, retval)
    346 	struct lwp *l;
    347 	struct linux_sys_modify_ldt_args /* {
    348 		syscallarg(int) func;
    349 		syscallarg(void *) ptr;
    350 		syscallarg(size_t) bytecount;
    351 	} */ *uap;
    352 	register_t *retval;
    353 {
    354 	struct proc *p = l->l_proc;
    355 	struct i386_get_ldt_args gl;
    356 	int error;
    357 	caddr_t sg;
    358 	char *parms;
    359 
    360 	sg = stackgap_init(p->p_emul);
    361 
    362 	gl.start = 0;
    363 	gl.desc = SCARG(uap, ptr);
    364 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    365 
    366 	parms = stackgap_alloc(&sg, sizeof(gl));
    367 
    368 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    369 		return (error);
    370 
    371 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    372 		return (error);
    373 
    374 	*retval *= sizeof(union descriptor);
    375 	return (0);
    376 }
    377 
    378 struct linux_ldt_info {
    379 	u_int entry_number;
    380 	u_long base_addr;
    381 	u_int limit;
    382 	u_int seg_32bit:1;
    383 	u_int contents:2;
    384 	u_int read_exec_only:1;
    385 	u_int limit_in_pages:1;
    386 	u_int seg_not_present:1;
    387 };
    388 
    389 int
    390 linux_write_ldt(l, uap, retval)
    391 	struct lwp *l;
    392 	struct linux_sys_modify_ldt_args /* {
    393 		syscallarg(int) func;
    394 		syscallarg(void *) ptr;
    395 		syscallarg(size_t) bytecount;
    396 	} */ *uap;
    397 	register_t *retval;
    398 {
    399 	struct proc *p = l->l_proc;
    400 	struct linux_ldt_info ldt_info;
    401 	struct segment_descriptor sd;
    402 	struct i386_set_ldt_args sl;
    403 	int error;
    404 	caddr_t sg;
    405 	char *parms;
    406 
    407 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    408 		return (EINVAL);
    409 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    410 		return error;
    411 	if (ldt_info.contents == 3)
    412 		return (EINVAL);
    413 
    414 	sg = stackgap_init(p->p_emul);
    415 
    416 	sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    417 	sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    418 	sd.sd_lolimit = ldt_info.limit & 0xffff;
    419 	sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    420 	sd.sd_type =
    421 	    16 | (ldt_info.contents << 2) | (!ldt_info.read_exec_only << 1);
    422 	sd.sd_dpl = SEL_UPL;
    423 	sd.sd_p = !ldt_info.seg_not_present;
    424 	sd.sd_def32 = ldt_info.seg_32bit;
    425 	sd.sd_gran = ldt_info.limit_in_pages;
    426 
    427 	sl.start = ldt_info.entry_number;
    428 	sl.desc = stackgap_alloc(&sg, sizeof(sd));
    429 	sl.num = 1;
    430 
    431 #if 0
    432 	printf("linux_write_ldt: idx=%d, base=%x, limit=%x\n",
    433 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit);
    434 #endif
    435 
    436 	parms = stackgap_alloc(&sg, sizeof(sl));
    437 
    438 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    439 		return (error);
    440 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    441 		return (error);
    442 
    443 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    444 		return (error);
    445 
    446 	*retval = 0;
    447 	return (0);
    448 }
    449 
    450 #endif /* USER_LDT */
    451 
    452 int
    453 linux_sys_modify_ldt(l, v, retval)
    454 	struct lwp *l;
    455 	void *v;
    456 	register_t *retval;
    457 {
    458 	struct linux_sys_modify_ldt_args /* {
    459 		syscallarg(int) func;
    460 		syscallarg(void *) ptr;
    461 		syscallarg(size_t) bytecount;
    462 	} */ *uap = v;
    463 
    464 	switch (SCARG(uap, func)) {
    465 #ifdef USER_LDT
    466 	case 0:
    467 		return (linux_read_ldt(l, uap, retval));
    468 
    469 	case 1:
    470 		return (linux_write_ldt(l, uap, retval));
    471 #endif /* USER_LDT */
    472 
    473 	default:
    474 		return (ENOSYS);
    475 	}
    476 }
    477 
    478 /*
    479  * XXX Pathetic hack to make svgalib work. This will fake the major
    480  * device number of an opened VT so that svgalib likes it. grmbl.
    481  * Should probably do it 'wrong the right way' and use a mapping
    482  * array for all major device numbers, and map linux_mknod too.
    483  */
    484 dev_t
    485 linux_fakedev(dev)
    486 	dev_t dev;
    487 {
    488 #if (NWSDISPLAY > 0)
    489 	if (major(dev) == NETBSD_WSCONS_MAJOR)
    490 		return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    491 #endif
    492 	return dev;
    493 }
    494 
    495 #if (NWSDISPLAY > 0)
    496 /*
    497  * That's not complete, but enough to get an X server running.
    498  */
    499 #define NR_KEYS 128
    500 static const u_short plain_map[NR_KEYS] = {
    501 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    502 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    503 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    504 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    505 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    506 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    507 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    508 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    509 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    510 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    511 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    512 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    513 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    514 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    515 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    516 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    517 }, shift_map[NR_KEYS] = {
    518 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    519 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    520 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    521 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    522 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    523 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    524 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    525 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    526 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    527 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    528 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    529 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    530 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    531 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    532 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    533 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    534 }, altgr_map[NR_KEYS] = {
    535 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    536 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    537 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    538 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    539 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    540 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    541 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    542 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    543 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    544 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    545 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    546 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    547 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    548 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    549 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    550 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    551 }, ctrl_map[NR_KEYS] = {
    552 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    553 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    554 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    555 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    556 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    557 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    558 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    559 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    560 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    561 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    562 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    563 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    564 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    565 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    566 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    567 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    568 };
    569 
    570 const u_short * const linux_keytabs[] = {
    571 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    572 };
    573 #endif
    574 
    575 static struct biosdisk_info *
    576 fd2biosinfo(p, fp)
    577 	struct proc *p;
    578 	struct file *fp;
    579 {
    580 	struct vnode *vp;
    581 	const char *blkname;
    582 	char diskname[16];
    583 	int i;
    584 	struct nativedisk_info *nip;
    585 	struct disklist *dl = i386_alldisks;
    586 
    587 	if (fp->f_type != DTYPE_VNODE)
    588 		return NULL;
    589 	vp = (struct vnode *)fp->f_data;
    590 
    591 	if (vp->v_type != VBLK)
    592 		return NULL;
    593 
    594 	blkname = findblkname(major(vp->v_rdev));
    595 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    596 	    DISKUNIT(vp->v_rdev));
    597 
    598 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    599 		nip = &dl->dl_nativedisks[i];
    600 		if (strcmp(diskname, nip->ni_devname))
    601 			continue;
    602 		if (nip->ni_nmatches != 0)
    603 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    604 	}
    605 
    606 	return NULL;
    607 }
    608 
    609 
    610 /*
    611  * We come here in a last attempt to satisfy a Linux ioctl() call
    612  */
    613 int
    614 linux_machdepioctl(p, v, retval)
    615 	struct proc *p;
    616 	void *v;
    617 	register_t *retval;
    618 {
    619 	struct linux_sys_ioctl_args /* {
    620 		syscallarg(int) fd;
    621 		syscallarg(u_long) com;
    622 		syscallarg(caddr_t) data;
    623 	} */ *uap = v;
    624 	struct sys_ioctl_args bia;
    625 	u_long com;
    626 	int error, error1;
    627 #if (NWSDISPLAY > 0)
    628 	struct vt_mode lvt;
    629 	caddr_t bvtp, sg;
    630 	struct kbentry kbe;
    631 #endif
    632 	struct linux_hd_geometry hdg;
    633 	struct linux_hd_big_geometry hdg_big;
    634 	struct biosdisk_info *bip;
    635 	struct filedesc *fdp;
    636 	struct file *fp;
    637 	int fd;
    638 	struct disklabel label, *labp;
    639 	struct partinfo partp;
    640 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    641 	u_long start, biostotal, realtotal;
    642 	u_char heads, sectors;
    643 	u_int cylinders;
    644 	struct ioctl_pt pt;
    645 
    646 	fd = SCARG(uap, fd);
    647 	SCARG(&bia, fd) = fd;
    648 	SCARG(&bia, data) = SCARG(uap, data);
    649 	com = SCARG(uap, com);
    650 
    651 	fdp = p->p_fd;
    652 
    653 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    654 		return (EBADF);
    655 
    656 	switch (com) {
    657 #if (NWSDISPLAY > 0)
    658 	case LINUX_KDGKBMODE:
    659 		com = KDGKBMODE;
    660 		break;
    661 	case LINUX_KDSKBMODE:
    662 		com = KDSKBMODE;
    663 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    664 			SCARG(&bia, data) = (caddr_t)K_RAW;
    665 		break;
    666 	case LINUX_KIOCSOUND:
    667 		SCARG(&bia, data) =
    668 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    669 		/* fall through */
    670 	case LINUX_KDMKTONE:
    671 		com = KDMKTONE;
    672 		break;
    673 	case LINUX_KDSETMODE:
    674 		com = KDSETMODE;
    675 		break;
    676 	case LINUX_KDGETMODE:
    677 		/* KD_* values are equal to the wscons numbers */
    678 		com = WSDISPLAYIO_GMODE;
    679 		break;
    680 	case LINUX_KDENABIO:
    681 		com = KDENABIO;
    682 		break;
    683 	case LINUX_KDDISABIO:
    684 		com = KDDISABIO;
    685 		break;
    686 	case LINUX_KDGETLED:
    687 		com = KDGETLED;
    688 		break;
    689 	case LINUX_KDSETLED:
    690 		com = KDSETLED;
    691 		break;
    692 	case LINUX_VT_OPENQRY:
    693 		com = VT_OPENQRY;
    694 		break;
    695 	case LINUX_VT_GETMODE:
    696 		SCARG(&bia, com) = VT_GETMODE;
    697 		/* XXX NJWLWP */
    698 		if ((error = sys_ioctl(curproc, &bia, retval)))
    699 			return error;
    700 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    701 		    sizeof (struct vt_mode))))
    702 			return error;
    703 		lvt.relsig = native_to_linux_sig[lvt.relsig];
    704 		lvt.acqsig = native_to_linux_sig[lvt.acqsig];
    705 		lvt.frsig = native_to_linux_sig[lvt.frsig];
    706 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    707 		    sizeof (struct vt_mode));
    708 	case LINUX_VT_SETMODE:
    709 		com = VT_SETMODE;
    710 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    711 		    sizeof (struct vt_mode))))
    712 			return error;
    713 		lvt.relsig = linux_to_native_sig[lvt.relsig];
    714 		lvt.acqsig = linux_to_native_sig[lvt.acqsig];
    715 		lvt.frsig = linux_to_native_sig[lvt.frsig];
    716 		sg = stackgap_init(p->p_emul);
    717 		bvtp = stackgap_alloc(&sg, sizeof (struct vt_mode));
    718 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    719 			return error;
    720 		SCARG(&bia, data) = bvtp;
    721 		break;
    722 	case LINUX_VT_DISALLOCATE:
    723 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    724 		return 0;
    725 	case LINUX_VT_RELDISP:
    726 		com = VT_RELDISP;
    727 		break;
    728 	case LINUX_VT_ACTIVATE:
    729 		com = VT_ACTIVATE;
    730 		break;
    731 	case LINUX_VT_WAITACTIVE:
    732 		com = VT_WAITACTIVE;
    733 		break;
    734 	case LINUX_VT_GETSTATE:
    735 		com = VT_GETSTATE;
    736 		break;
    737 	case LINUX_KDGKBTYPE:
    738 		/* This is what Linux does. */
    739 		return (subyte(SCARG(uap, data), KB_101));
    740 	case LINUX_KDGKBENT:
    741 		/*
    742 		 * The Linux KDGKBENT ioctl is different from the
    743 		 * SYSV original. So we handle it in machdep code.
    744 		 * XXX We should use keyboard mapping information
    745 		 * from wsdisplay, but this would be expensive.
    746 		 */
    747 		if ((error = copyin(SCARG(uap, data), &kbe,
    748 				    sizeof(struct kbentry))))
    749 			return (error);
    750 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    751 		    || kbe.kb_index >= NR_KEYS)
    752 			return (EINVAL);
    753 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    754 		return (copyout(&kbe, SCARG(uap, data),
    755 				sizeof(struct kbentry)));
    756 #endif
    757 	case LINUX_HDIO_GETGEO:
    758 	case LINUX_HDIO_GETGEO_BIG:
    759 		/*
    760 		 * Try to mimic Linux behaviour: return the BIOS geometry
    761 		 * if possible (extending its # of cylinders if it's beyond
    762 		 * the 1023 limit), fall back to the MI geometry (i.e.
    763 		 * the real geometry) if not found, by returning an
    764 		 * error. See common/linux_hdio.c
    765 		 */
    766 		FILE_USE(fp);
    767 		bip = fd2biosinfo(p, fp);
    768 		ioctlf = fp->f_ops->fo_ioctl;
    769 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    770 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    771 		FILE_UNUSE(fp, p);
    772 		if (error != 0 && error1 != 0)
    773 			return error1;
    774 		labp = error != 0 ? &label : partp.disklab;
    775 		start = error1 != 0 ? partp.part->p_offset : 0;
    776 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    777 		    && bip->bi_cyl != 0) {
    778 			heads = bip->bi_head;
    779 			sectors = bip->bi_sec;
    780 			cylinders = bip->bi_cyl;
    781 			biostotal = heads * sectors * cylinders;
    782 			realtotal = labp->d_ntracks * labp->d_nsectors *
    783 			    labp->d_ncylinders;
    784 			if (realtotal > biostotal)
    785 				cylinders = realtotal / (heads * sectors);
    786 		} else {
    787 			heads = labp->d_ntracks;
    788 			cylinders = labp->d_ncylinders;
    789 			sectors = labp->d_nsectors;
    790 		}
    791 		if (com == LINUX_HDIO_GETGEO) {
    792 			hdg.start = start;
    793 			hdg.heads = heads;
    794 			hdg.cylinders = cylinders;
    795 			hdg.sectors = sectors;
    796 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    797 		} else {
    798 			hdg_big.start = start;
    799 			hdg_big.heads = heads;
    800 			hdg_big.cylinders = cylinders;
    801 			hdg_big.sectors = sectors;
    802 			return copyout(&hdg_big, SCARG(uap, data),
    803 			    sizeof hdg_big);
    804 		}
    805 		return 0;
    806 
    807 	default:
    808 		/*
    809 		 * Unknown to us. If it's on a device, just pass it through
    810 		 * using PTIOCLINUX, the device itself might be able to
    811 		 * make some sense of it.
    812 		 * XXX hack: if the function returns EJUSTRETURN,
    813 		 * it has stuffed a sysctl return value in pt.data.
    814 		 */
    815 		FILE_USE(fp);
    816 		ioctlf = fp->f_ops->fo_ioctl;
    817 		pt.com = SCARG(uap, com);
    818 		pt.data = SCARG(uap, data);
    819 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    820 		FILE_UNUSE(fp, p);
    821 		if (error == EJUSTRETURN) {
    822 			retval[0] = (register_t)pt.data;
    823 			error = 0;
    824 		}
    825 
    826 		if (error == ENOTTY)
    827 			printf("linux_machdepioctl: invalid ioctl %08lx\n",
    828 			    com);
    829 		return error;
    830 	}
    831 	SCARG(&bia, com) = com;
    832 	/* XXX NJWLWP */
    833 	return sys_ioctl(curproc, &bia, retval);
    834 }
    835 
    836 /*
    837  * Set I/O permissions for a process. Just set the maximum level
    838  * right away (ignoring the argument), otherwise we would have
    839  * to rely on I/O permission maps, which are not implemented.
    840  */
    841 int
    842 linux_sys_iopl(l, v, retval)
    843 	struct lwp *l;
    844 	void *v;
    845 	register_t *retval;
    846 {
    847 #if 0
    848 	struct linux_sys_iopl_args /* {
    849 		syscallarg(int) level;
    850 	} */ *uap = v;
    851 #endif
    852 	struct proc *p = l->l_proc;
    853 	struct trapframe *fp = l->l_md.md_regs;
    854 
    855 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    856 		return EPERM;
    857 	fp->tf_eflags |= PSL_IOPL;
    858 	*retval = 0;
    859 	return 0;
    860 }
    861 
    862 /*
    863  * See above. If a root process tries to set access to an I/O port,
    864  * just let it have the whole range.
    865  */
    866 int
    867 linux_sys_ioperm(l, v, retval)
    868 	struct lwp *l;
    869 	void *v;
    870 	register_t *retval;
    871 {
    872 	struct linux_sys_ioperm_args /* {
    873 		syscallarg(unsigned int) lo;
    874 		syscallarg(unsigned int) hi;
    875 		syscallarg(int) val;
    876 	} */ *uap = v;
    877 	struct proc *p = l->l_proc;
    878 	struct trapframe *fp = l->l_md.md_regs;
    879 
    880 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    881 		return EPERM;
    882 	if (SCARG(uap, val))
    883 		fp->tf_eflags |= PSL_IOPL;
    884 	*retval = 0;
    885 	return 0;
    886 }
    887