Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.62.2.4
      1 /*	$NetBSD: linux_machdep.c,v 1.62.2.4 2001/11/14 19:13:03 nathanw Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.62.2.4 2001/11/14 19:13:03 nathanw Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/map.h>
     52 #include <sys/lwp.h>
     53 #include <sys/proc.h>
     54 #include <sys/user.h>
     55 #include <sys/buf.h>
     56 #include <sys/reboot.h>
     57 #include <sys/conf.h>
     58 #include <sys/exec.h>
     59 #include <sys/file.h>
     60 #include <sys/callout.h>
     61 #include <sys/malloc.h>
     62 #include <sys/mbuf.h>
     63 #include <sys/msgbuf.h>
     64 #include <sys/mount.h>
     65 #include <sys/vnode.h>
     66 #include <sys/device.h>
     67 #include <sys/syscallargs.h>
     68 #include <sys/filedesc.h>
     69 #include <sys/exec_elf.h>
     70 #include <sys/disklabel.h>
     71 #include <sys/ioctl.h>
     72 #include <miscfs/specfs/specdev.h>
     73 
     74 #include <compat/linux/common/linux_types.h>
     75 #include <compat/linux/common/linux_signal.h>
     76 #include <compat/linux/common/linux_util.h>
     77 #include <compat/linux/common/linux_ioctl.h>
     78 #include <compat/linux/common/linux_hdio.h>
     79 #include <compat/linux/common/linux_exec.h>
     80 #include <compat/linux/common/linux_machdep.h>
     81 
     82 #include <compat/linux/linux_syscallargs.h>
     83 
     84 #include <machine/cpu.h>
     85 #include <machine/cpufunc.h>
     86 #include <machine/psl.h>
     87 #include <machine/reg.h>
     88 #include <machine/segments.h>
     89 #include <machine/specialreg.h>
     90 #include <machine/sysarch.h>
     91 #include <machine/vm86.h>
     92 #include <machine/vmparam.h>
     93 
     94 /*
     95  * To see whether wscons is configured (for virtual console ioctl calls).
     96  */
     97 #if defined(_KERNEL_OPT)
     98 #include "wsdisplay.h"
     99 #endif
    100 #if (NWSDISPLAY > 0)
    101 #include <dev/wscons/wsconsio.h>
    102 #include <dev/wscons/wsdisplay_usl_io.h>
    103 #if defined(_KERNEL_OPT)
    104 #include "opt_xserver.h"
    105 #endif
    106 #endif
    107 
    108 #ifdef USER_LDT
    109 #include <machine/cpu.h>
    110 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    111     register_t *));
    112 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    113     register_t *));
    114 #endif
    115 
    116 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    117 extern struct disklist *i386_alldisks;
    118 extern const char *findblkname __P((int));
    119 
    120 /*
    121  * Deal with some i386-specific things in the Linux emulation code.
    122  */
    123 
    124 void
    125 linux_setregs(l, epp, stack)
    126 	struct lwp *l;
    127 	struct exec_package *epp;
    128 	u_long stack;
    129 {
    130 	struct pcb *pcb = &l->l_addr->u_pcb;
    131 
    132 	setregs(l, epp, stack);
    133 	if (i386_use_fxsave)
    134 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    135 	else
    136 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    137 }
    138 
    139 /*
    140  * Send an interrupt to process.
    141  *
    142  * Stack is set up to allow sigcode stored
    143  * in u. to call routine, followed by kcall
    144  * to sigreturn routine below.  After sigreturn
    145  * resets the signal mask, the stack, and the
    146  * frame pointer, it returns to the user
    147  * specified pc, psl.
    148  */
    149 
    150 void
    151 linux_sendsig(catcher, sig, mask, code)
    152 	sig_t catcher;
    153 	int sig;
    154 	sigset_t *mask;
    155 	u_long code;
    156 {
    157 	struct lwp *l = curproc;
    158 	struct proc *p = l->l_proc;
    159 	struct trapframe *tf;
    160 	struct linux_sigframe *fp, frame;
    161 	int onstack;
    162 
    163 	tf = l->l_md.md_regs;
    164 	/* Do we need to jump onto the signal stack? */
    165 	onstack =
    166 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    167 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    168 
    169 	/* Allocate space for the signal handler context. */
    170 	if (onstack)
    171 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    172 					  p->p_sigctx.ps_sigstk.ss_size);
    173 	else
    174 		fp = (struct linux_sigframe *)tf->tf_esp;
    175 	fp--;
    176 
    177 	/* Build stack frame for signal trampoline. */
    178 	frame.sf_handler = catcher;
    179 	frame.sf_sig = native_to_linux_sig[sig];
    180 
    181 	/* Save register context. */
    182 #ifdef VM86
    183 	if (tf->tf_eflags & PSL_VM) {
    184 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    185 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    186 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    187 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    188 		frame.sf_sc.sc_eflags = get_vflags(l);
    189 	} else
    190 #endif
    191 	{
    192 		frame.sf_sc.sc_gs = tf->tf_gs;
    193 		frame.sf_sc.sc_fs = tf->tf_fs;
    194 		frame.sf_sc.sc_es = tf->tf_es;
    195 		frame.sf_sc.sc_ds = tf->tf_ds;
    196 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    197 	}
    198 	frame.sf_sc.sc_edi = tf->tf_edi;
    199 	frame.sf_sc.sc_esi = tf->tf_esi;
    200 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    201 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    202 	frame.sf_sc.sc_edx = tf->tf_edx;
    203 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    204 	frame.sf_sc.sc_eax = tf->tf_eax;
    205 	frame.sf_sc.sc_eip = tf->tf_eip;
    206 	frame.sf_sc.sc_cs = tf->tf_cs;
    207 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    208 	frame.sf_sc.sc_ss = tf->tf_ss;
    209 	frame.sf_sc.sc_err = tf->tf_err;
    210 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    211 
    212 	/* Save signal stack. */
    213 	/* Linux doesn't save the onstack flag in sigframe */
    214 
    215 	/* Save signal mask. */
    216 	native_to_linux_old_sigset(mask, &frame.sf_sc.sc_mask);
    217 
    218 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    219 		/*
    220 		 * Process has trashed its stack; give it an illegal
    221 		 * instruction to halt it in its tracks.
    222 		 */
    223 		sigexit(l, SIGILL);
    224 		/* NOTREACHED */
    225 	}
    226 
    227 	/*
    228 	 * Build context to run handler in.
    229 	 */
    230 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    231 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    232 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    233 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    234 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    235 	tf->tf_esp = (int)fp;
    236 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    237 
    238 	/* Remember that we're now on the signal stack. */
    239 	if (onstack)
    240 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    241 }
    242 
    243 /*
    244  * System call to cleanup state after a signal
    245  * has been taken.  Reset signal mask and
    246  * stack state from context left by sendsig (above).
    247  * Return to previous pc and psl as specified by
    248  * context left by sendsig. Check carefully to
    249  * make sure that the user has not modified the
    250  * psl to gain improper privileges or to cause
    251  * a machine fault.
    252  */
    253 int
    254 linux_sys_rt_sigreturn(l, v, retval)
    255 	struct lwp *l;
    256 	void *v;
    257 	register_t *retval;
    258 {
    259 	/* XXX XAX write me */
    260 	return(ENOSYS);
    261 }
    262 
    263 int
    264 linux_sys_sigreturn(l, v, retval)
    265 	struct lwp *l;
    266 	void *v;
    267 	register_t *retval;
    268 {
    269 	struct linux_sys_sigreturn_args /* {
    270 		syscallarg(struct linux_sigcontext *) scp;
    271 	} */ *uap = v;
    272 	struct proc *p = l->l_proc;
    273 	struct linux_sigcontext *scp, context;
    274 	struct trapframe *tf;
    275 	sigset_t mask;
    276 	ssize_t ss_gap;
    277 
    278 	/*
    279 	 * The trampoline code hands us the context.
    280 	 * It is unsafe to keep track of it ourselves, in the event that a
    281 	 * program jumps out of a signal handler.
    282 	 */
    283 	scp = SCARG(uap, scp);
    284 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    285 		return (EFAULT);
    286 
    287 	/* Restore register context. */
    288 	tf = l->l_md.md_regs;
    289 #ifdef VM86
    290 	if (context.sc_eflags & PSL_VM) {
    291 		tf->tf_vm86_gs = context.sc_gs;
    292 		tf->tf_vm86_fs = context.sc_fs;
    293 		tf->tf_vm86_es = context.sc_es;
    294 		tf->tf_vm86_ds = context.sc_ds;
    295 		set_vflags(l, context.sc_eflags);
    296 	} else
    297 #endif
    298 	{
    299 		/*
    300 		 * Check for security violations.  If we're returning to
    301 		 * protected mode, the CPU will validate the segment registers
    302 		 * automatically and generate a trap on violations.  We handle
    303 		 * the trap, rather than doing all of the checking here.
    304 		 */
    305 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    306 		    !USERMODE(context.sc_cs, context.sc_eflags))
    307 			return (EINVAL);
    308 
    309 		/* %fs and %gs were restored by the trampoline. */
    310 		tf->tf_es = context.sc_es;
    311 		tf->tf_ds = context.sc_ds;
    312 		tf->tf_eflags = context.sc_eflags;
    313 	}
    314 	tf->tf_edi = context.sc_edi;
    315 	tf->tf_esi = context.sc_esi;
    316 	tf->tf_ebp = context.sc_ebp;
    317 	tf->tf_ebx = context.sc_ebx;
    318 	tf->tf_edx = context.sc_edx;
    319 	tf->tf_ecx = context.sc_ecx;
    320 	tf->tf_eax = context.sc_eax;
    321 	tf->tf_eip = context.sc_eip;
    322 	tf->tf_cs = context.sc_cs;
    323 	tf->tf_esp = context.sc_esp_at_signal;
    324 	tf->tf_ss = context.sc_ss;
    325 
    326 	/* Restore signal stack. */
    327 	/*
    328 	 * Linux really does it this way; it doesn't have space in sigframe
    329 	 * to save the onstack flag.
    330 	 */
    331 	ss_gap = (ssize_t)
    332 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    333 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    334 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    335 	else
    336 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    337 
    338 	/* Restore signal mask. */
    339 	linux_old_to_native_sigset(&context.sc_mask, &mask);
    340 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    341 
    342 	return (EJUSTRETURN);
    343 }
    344 
    345 #ifdef USER_LDT
    346 
    347 int
    348 linux_read_ldt(l, uap, retval)
    349 	struct lwp *l;
    350 	struct linux_sys_modify_ldt_args /* {
    351 		syscallarg(int) func;
    352 		syscallarg(void *) ptr;
    353 		syscallarg(size_t) bytecount;
    354 	} */ *uap;
    355 	register_t *retval;
    356 {
    357 	struct proc *p = l->l_proc;
    358 	struct i386_get_ldt_args gl;
    359 	int error;
    360 	caddr_t sg;
    361 	char *parms;
    362 
    363 	sg = stackgap_init(p->p_emul);
    364 
    365 	gl.start = 0;
    366 	gl.desc = SCARG(uap, ptr);
    367 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    368 
    369 	parms = stackgap_alloc(&sg, sizeof(gl));
    370 
    371 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    372 		return (error);
    373 
    374 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    375 		return (error);
    376 
    377 	*retval *= sizeof(union descriptor);
    378 	return (0);
    379 }
    380 
    381 struct linux_ldt_info {
    382 	u_int entry_number;
    383 	u_long base_addr;
    384 	u_int limit;
    385 	u_int seg_32bit:1;
    386 	u_int contents:2;
    387 	u_int read_exec_only:1;
    388 	u_int limit_in_pages:1;
    389 	u_int seg_not_present:1;
    390 };
    391 
    392 int
    393 linux_write_ldt(l, uap, retval)
    394 	struct lwp *l;
    395 	struct linux_sys_modify_ldt_args /* {
    396 		syscallarg(int) func;
    397 		syscallarg(void *) ptr;
    398 		syscallarg(size_t) bytecount;
    399 	} */ *uap;
    400 	register_t *retval;
    401 {
    402 	struct proc *p = l->l_proc;
    403 	struct linux_ldt_info ldt_info;
    404 	struct segment_descriptor sd;
    405 	struct i386_set_ldt_args sl;
    406 	int error;
    407 	caddr_t sg;
    408 	char *parms;
    409 
    410 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    411 		return (EINVAL);
    412 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    413 		return error;
    414 	if (ldt_info.contents == 3)
    415 		return (EINVAL);
    416 
    417 	sg = stackgap_init(p->p_emul);
    418 
    419 	sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    420 	sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    421 	sd.sd_lolimit = ldt_info.limit & 0xffff;
    422 	sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    423 	sd.sd_type =
    424 	    16 | (ldt_info.contents << 2) | (!ldt_info.read_exec_only << 1);
    425 	sd.sd_dpl = SEL_UPL;
    426 	sd.sd_p = !ldt_info.seg_not_present;
    427 	sd.sd_def32 = ldt_info.seg_32bit;
    428 	sd.sd_gran = ldt_info.limit_in_pages;
    429 
    430 	sl.start = ldt_info.entry_number;
    431 	sl.desc = stackgap_alloc(&sg, sizeof(sd));
    432 	sl.num = 1;
    433 
    434 #if 0
    435 	printf("linux_write_ldt: idx=%d, base=%x, limit=%x\n",
    436 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit);
    437 #endif
    438 
    439 	parms = stackgap_alloc(&sg, sizeof(sl));
    440 
    441 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    442 		return (error);
    443 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    444 		return (error);
    445 
    446 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    447 		return (error);
    448 
    449 	*retval = 0;
    450 	return (0);
    451 }
    452 
    453 #endif /* USER_LDT */
    454 
    455 int
    456 linux_sys_modify_ldt(l, v, retval)
    457 	struct lwp *l;
    458 	void *v;
    459 	register_t *retval;
    460 {
    461 	struct linux_sys_modify_ldt_args /* {
    462 		syscallarg(int) func;
    463 		syscallarg(void *) ptr;
    464 		syscallarg(size_t) bytecount;
    465 	} */ *uap = v;
    466 
    467 	switch (SCARG(uap, func)) {
    468 #ifdef USER_LDT
    469 	case 0:
    470 		return (linux_read_ldt(l, uap, retval));
    471 
    472 	case 1:
    473 		return (linux_write_ldt(l, uap, retval));
    474 #endif /* USER_LDT */
    475 
    476 	default:
    477 		return (ENOSYS);
    478 	}
    479 }
    480 
    481 /*
    482  * XXX Pathetic hack to make svgalib work. This will fake the major
    483  * device number of an opened VT so that svgalib likes it. grmbl.
    484  * Should probably do it 'wrong the right way' and use a mapping
    485  * array for all major device numbers, and map linux_mknod too.
    486  */
    487 dev_t
    488 linux_fakedev(dev)
    489 	dev_t dev;
    490 {
    491 #if (NWSDISPLAY > 0)
    492 	if (major(dev) == NETBSD_WSCONS_MAJOR)
    493 		return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    494 #endif
    495 	return dev;
    496 }
    497 
    498 #if (NWSDISPLAY > 0)
    499 /*
    500  * That's not complete, but enough to get an X server running.
    501  */
    502 #define NR_KEYS 128
    503 static const u_short plain_map[NR_KEYS] = {
    504 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    505 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    506 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    507 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    508 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    509 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    510 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    511 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    512 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    513 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    514 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    515 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    516 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    517 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    518 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    519 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    520 }, shift_map[NR_KEYS] = {
    521 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    522 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    523 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    524 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    525 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    526 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    527 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    528 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    529 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    530 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    531 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    532 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    533 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    534 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    535 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    536 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    537 }, altgr_map[NR_KEYS] = {
    538 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    539 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    540 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    541 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    542 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    543 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    544 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    545 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    546 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    547 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    548 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    549 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    550 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    551 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    552 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    553 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    554 }, ctrl_map[NR_KEYS] = {
    555 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    556 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    557 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    558 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    559 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    560 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    561 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    562 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    563 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    564 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    565 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    566 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    567 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    568 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    569 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    570 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    571 };
    572 
    573 const u_short * const linux_keytabs[] = {
    574 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    575 };
    576 #endif
    577 
    578 static struct biosdisk_info *
    579 fd2biosinfo(p, fp)
    580 	struct proc *p;
    581 	struct file *fp;
    582 {
    583 	struct vnode *vp;
    584 	const char *blkname;
    585 	char diskname[16];
    586 	int i;
    587 	struct nativedisk_info *nip;
    588 	struct disklist *dl = i386_alldisks;
    589 
    590 	if (fp->f_type != DTYPE_VNODE)
    591 		return NULL;
    592 	vp = (struct vnode *)fp->f_data;
    593 
    594 	if (vp->v_type != VBLK)
    595 		return NULL;
    596 
    597 	blkname = findblkname(major(vp->v_rdev));
    598 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    599 	    DISKUNIT(vp->v_rdev));
    600 
    601 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    602 		nip = &dl->dl_nativedisks[i];
    603 		if (strcmp(diskname, nip->ni_devname))
    604 			continue;
    605 		if (nip->ni_nmatches != 0)
    606 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    607 	}
    608 
    609 	return NULL;
    610 }
    611 
    612 
    613 /*
    614  * We come here in a last attempt to satisfy a Linux ioctl() call
    615  */
    616 int
    617 linux_machdepioctl(p, v, retval)
    618 	struct proc *p;
    619 	void *v;
    620 	register_t *retval;
    621 {
    622 	struct linux_sys_ioctl_args /* {
    623 		syscallarg(int) fd;
    624 		syscallarg(u_long) com;
    625 		syscallarg(caddr_t) data;
    626 	} */ *uap = v;
    627 	struct sys_ioctl_args bia;
    628 	u_long com;
    629 	int error, error1;
    630 #if (NWSDISPLAY > 0)
    631 	struct vt_mode lvt;
    632 	caddr_t bvtp, sg;
    633 	struct kbentry kbe;
    634 #endif
    635 	struct linux_hd_geometry hdg;
    636 	struct linux_hd_big_geometry hdg_big;
    637 	struct biosdisk_info *bip;
    638 	struct filedesc *fdp;
    639 	struct file *fp;
    640 	int fd;
    641 	struct disklabel label, *labp;
    642 	struct partinfo partp;
    643 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    644 	u_long start, biostotal, realtotal;
    645 	u_char heads, sectors;
    646 	u_int cylinders;
    647 	struct ioctl_pt pt;
    648 
    649 	fd = SCARG(uap, fd);
    650 	SCARG(&bia, fd) = fd;
    651 	SCARG(&bia, data) = SCARG(uap, data);
    652 	com = SCARG(uap, com);
    653 
    654 	fdp = p->p_fd;
    655 
    656 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    657 		return (EBADF);
    658 
    659 	switch (com) {
    660 #if (NWSDISPLAY > 0)
    661 	case LINUX_KDGKBMODE:
    662 		com = KDGKBMODE;
    663 		break;
    664 	case LINUX_KDSKBMODE:
    665 		com = KDSKBMODE;
    666 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    667 			SCARG(&bia, data) = (caddr_t)K_RAW;
    668 		break;
    669 	case LINUX_KIOCSOUND:
    670 		SCARG(&bia, data) =
    671 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    672 		/* fall through */
    673 	case LINUX_KDMKTONE:
    674 		com = KDMKTONE;
    675 		break;
    676 	case LINUX_KDSETMODE:
    677 		com = KDSETMODE;
    678 		break;
    679 	case LINUX_KDGETMODE:
    680 		/* KD_* values are equal to the wscons numbers */
    681 		com = WSDISPLAYIO_GMODE;
    682 		break;
    683 	case LINUX_KDENABIO:
    684 		com = KDENABIO;
    685 		break;
    686 	case LINUX_KDDISABIO:
    687 		com = KDDISABIO;
    688 		break;
    689 	case LINUX_KDGETLED:
    690 		com = KDGETLED;
    691 		break;
    692 	case LINUX_KDSETLED:
    693 		com = KDSETLED;
    694 		break;
    695 	case LINUX_VT_OPENQRY:
    696 		com = VT_OPENQRY;
    697 		break;
    698 	case LINUX_VT_GETMODE:
    699 		SCARG(&bia, com) = VT_GETMODE;
    700 		/* XXX NJWLWP */
    701 		if ((error = sys_ioctl(curproc, &bia, retval)))
    702 			return error;
    703 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    704 		    sizeof (struct vt_mode))))
    705 			return error;
    706 		lvt.relsig = native_to_linux_sig[lvt.relsig];
    707 		lvt.acqsig = native_to_linux_sig[lvt.acqsig];
    708 		lvt.frsig = native_to_linux_sig[lvt.frsig];
    709 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    710 		    sizeof (struct vt_mode));
    711 	case LINUX_VT_SETMODE:
    712 		com = VT_SETMODE;
    713 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    714 		    sizeof (struct vt_mode))))
    715 			return error;
    716 		lvt.relsig = linux_to_native_sig[lvt.relsig];
    717 		lvt.acqsig = linux_to_native_sig[lvt.acqsig];
    718 		lvt.frsig = linux_to_native_sig[lvt.frsig];
    719 		sg = stackgap_init(p->p_emul);
    720 		bvtp = stackgap_alloc(&sg, sizeof (struct vt_mode));
    721 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    722 			return error;
    723 		SCARG(&bia, data) = bvtp;
    724 		break;
    725 	case LINUX_VT_DISALLOCATE:
    726 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    727 		return 0;
    728 	case LINUX_VT_RELDISP:
    729 		com = VT_RELDISP;
    730 		break;
    731 	case LINUX_VT_ACTIVATE:
    732 		com = VT_ACTIVATE;
    733 		break;
    734 	case LINUX_VT_WAITACTIVE:
    735 		com = VT_WAITACTIVE;
    736 		break;
    737 	case LINUX_VT_GETSTATE:
    738 		com = VT_GETSTATE;
    739 		break;
    740 	case LINUX_KDGKBTYPE:
    741 		/* This is what Linux does. */
    742 		return (subyte(SCARG(uap, data), KB_101));
    743 	case LINUX_KDGKBENT:
    744 		/*
    745 		 * The Linux KDGKBENT ioctl is different from the
    746 		 * SYSV original. So we handle it in machdep code.
    747 		 * XXX We should use keyboard mapping information
    748 		 * from wsdisplay, but this would be expensive.
    749 		 */
    750 		if ((error = copyin(SCARG(uap, data), &kbe,
    751 				    sizeof(struct kbentry))))
    752 			return (error);
    753 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    754 		    || kbe.kb_index >= NR_KEYS)
    755 			return (EINVAL);
    756 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    757 		return (copyout(&kbe, SCARG(uap, data),
    758 				sizeof(struct kbentry)));
    759 #endif
    760 	case LINUX_HDIO_GETGEO:
    761 	case LINUX_HDIO_GETGEO_BIG:
    762 		/*
    763 		 * Try to mimic Linux behaviour: return the BIOS geometry
    764 		 * if possible (extending its # of cylinders if it's beyond
    765 		 * the 1023 limit), fall back to the MI geometry (i.e.
    766 		 * the real geometry) if not found, by returning an
    767 		 * error. See common/linux_hdio.c
    768 		 */
    769 		FILE_USE(fp);
    770 		bip = fd2biosinfo(p, fp);
    771 		ioctlf = fp->f_ops->fo_ioctl;
    772 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    773 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    774 		FILE_UNUSE(fp, p);
    775 		if (error != 0 && error1 != 0)
    776 			return error1;
    777 		labp = error != 0 ? &label : partp.disklab;
    778 		start = error1 != 0 ? partp.part->p_offset : 0;
    779 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    780 		    && bip->bi_cyl != 0) {
    781 			heads = bip->bi_head;
    782 			sectors = bip->bi_sec;
    783 			cylinders = bip->bi_cyl;
    784 			biostotal = heads * sectors * cylinders;
    785 			realtotal = labp->d_ntracks * labp->d_nsectors *
    786 			    labp->d_ncylinders;
    787 			if (realtotal > biostotal)
    788 				cylinders = realtotal / (heads * sectors);
    789 		} else {
    790 			heads = labp->d_ntracks;
    791 			cylinders = labp->d_ncylinders;
    792 			sectors = labp->d_nsectors;
    793 		}
    794 		if (com == LINUX_HDIO_GETGEO) {
    795 			hdg.start = start;
    796 			hdg.heads = heads;
    797 			hdg.cylinders = cylinders;
    798 			hdg.sectors = sectors;
    799 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    800 		} else {
    801 			hdg_big.start = start;
    802 			hdg_big.heads = heads;
    803 			hdg_big.cylinders = cylinders;
    804 			hdg_big.sectors = sectors;
    805 			return copyout(&hdg_big, SCARG(uap, data),
    806 			    sizeof hdg_big);
    807 		}
    808 		return 0;
    809 
    810 	default:
    811 		/*
    812 		 * Unknown to us. If it's on a device, just pass it through
    813 		 * using PTIOCLINUX, the device itself might be able to
    814 		 * make some sense of it.
    815 		 * XXX hack: if the function returns EJUSTRETURN,
    816 		 * it has stuffed a sysctl return value in pt.data.
    817 		 */
    818 		FILE_USE(fp);
    819 		ioctlf = fp->f_ops->fo_ioctl;
    820 		pt.com = SCARG(uap, com);
    821 		pt.data = SCARG(uap, data);
    822 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    823 		FILE_UNUSE(fp, p);
    824 		if (error == EJUSTRETURN) {
    825 			retval[0] = (register_t)pt.data;
    826 			error = 0;
    827 		}
    828 
    829 		if (error == ENOTTY)
    830 			printf("linux_machdepioctl: invalid ioctl %08lx\n",
    831 			    com);
    832 		return error;
    833 	}
    834 	SCARG(&bia, com) = com;
    835 	/* XXX NJWLWP */
    836 	return sys_ioctl(curproc, &bia, retval);
    837 }
    838 
    839 /*
    840  * Set I/O permissions for a process. Just set the maximum level
    841  * right away (ignoring the argument), otherwise we would have
    842  * to rely on I/O permission maps, which are not implemented.
    843  */
    844 int
    845 linux_sys_iopl(l, v, retval)
    846 	struct lwp *l;
    847 	void *v;
    848 	register_t *retval;
    849 {
    850 #if 0
    851 	struct linux_sys_iopl_args /* {
    852 		syscallarg(int) level;
    853 	} */ *uap = v;
    854 #endif
    855 	struct proc *p = l->l_proc;
    856 	struct trapframe *fp = l->l_md.md_regs;
    857 
    858 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    859 		return EPERM;
    860 	fp->tf_eflags |= PSL_IOPL;
    861 	*retval = 0;
    862 	return 0;
    863 }
    864 
    865 /*
    866  * See above. If a root process tries to set access to an I/O port,
    867  * just let it have the whole range.
    868  */
    869 int
    870 linux_sys_ioperm(l, v, retval)
    871 	struct lwp *l;
    872 	void *v;
    873 	register_t *retval;
    874 {
    875 	struct linux_sys_ioperm_args /* {
    876 		syscallarg(unsigned int) lo;
    877 		syscallarg(unsigned int) hi;
    878 		syscallarg(int) val;
    879 	} */ *uap = v;
    880 	struct proc *p = l->l_proc;
    881 	struct trapframe *fp = l->l_md.md_regs;
    882 
    883 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    884 		return EPERM;
    885 	if (SCARG(uap, val))
    886 		fp->tf_eflags |= PSL_IOPL;
    887 	*retval = 0;
    888 	return 0;
    889 }
    890