Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.80
      1 /*	$NetBSD: linux_machdep.c,v 1.80 2002/09/25 22:21:34 thorpej Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.80 2002/09/25 22:21:34 thorpej Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/syscallargs.h>
     66 #include <sys/filedesc.h>
     67 #include <sys/exec_elf.h>
     68 #include <sys/disklabel.h>
     69 #include <sys/ioctl.h>
     70 #include <miscfs/specfs/specdev.h>
     71 
     72 #include <compat/linux/common/linux_types.h>
     73 #include <compat/linux/common/linux_signal.h>
     74 #include <compat/linux/common/linux_util.h>
     75 #include <compat/linux/common/linux_ioctl.h>
     76 #include <compat/linux/common/linux_hdio.h>
     77 #include <compat/linux/common/linux_exec.h>
     78 #include <compat/linux/common/linux_machdep.h>
     79 
     80 #include <compat/linux/linux_syscallargs.h>
     81 
     82 #include <machine/cpu.h>
     83 #include <machine/cpufunc.h>
     84 #include <machine/psl.h>
     85 #include <machine/reg.h>
     86 #include <machine/segments.h>
     87 #include <machine/specialreg.h>
     88 #include <machine/sysarch.h>
     89 #include <machine/vm86.h>
     90 #include <machine/vmparam.h>
     91 
     92 /*
     93  * To see whether wscons is configured (for virtual console ioctl calls).
     94  */
     95 #if defined(_KERNEL_OPT)
     96 #include "wsdisplay.h"
     97 #endif
     98 #if (NWSDISPLAY > 0)
     99 #include <dev/wscons/wsconsio.h>
    100 #include <dev/wscons/wsdisplay_usl_io.h>
    101 #if defined(_KERNEL_OPT)
    102 #include "opt_xserver.h"
    103 #endif
    104 #endif
    105 
    106 #ifdef USER_LDT
    107 #include <machine/cpu.h>
    108 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    109     register_t *));
    110 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    111     register_t *));
    112 #endif
    113 
    114 #ifdef DEBUG_LINUX
    115 #define DPRINTF(a) uprintf a
    116 #else
    117 #define DPRINTF(a)
    118 #endif
    119 
    120 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    121 extern struct disklist *i386_alldisks;
    122 
    123 /*
    124  * Deal with some i386-specific things in the Linux emulation code.
    125  */
    126 
    127 void
    128 linux_setregs(p, epp, stack)
    129 	struct proc *p;
    130 	struct exec_package *epp;
    131 	u_long stack;
    132 {
    133 	struct pcb *pcb = &p->p_addr->u_pcb;
    134 	struct trapframe *tf;
    135 
    136 #if NNPX > 0
    137 	/* If we were using the FPU, forget about it. */
    138 	if (npxproc == p)
    139 		npxdrop();
    140 #endif
    141 
    142 #ifdef USER_LDT
    143 	pmap_ldt_cleanup(p);
    144 #endif
    145 
    146 	p->p_md.md_flags &= ~MDP_USEDFPU;
    147 	pcb->pcb_flags = 0;
    148 
    149 	if (i386_use_fxsave) {
    150 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    151 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    152 	} else
    153 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    154 
    155 	tf = p->p_md.md_regs;
    156 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    157 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    158 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    159 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    160 	tf->tf_edi = 0;
    161 	tf->tf_esi = 0;
    162 	tf->tf_ebp = 0;
    163 	tf->tf_ebx = (int)p->p_psstr;
    164 	tf->tf_edx = 0;
    165 	tf->tf_ecx = 0;
    166 	tf->tf_eax = 0;
    167 	tf->tf_eip = epp->ep_entry;
    168 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    169 	tf->tf_eflags = PSL_USERSET;
    170 	tf->tf_esp = stack;
    171 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    172 }
    173 
    174 /*
    175  * Send an interrupt to process.
    176  *
    177  * Stack is set up to allow sigcode stored
    178  * in u. to call routine, followed by kcall
    179  * to sigreturn routine below.  After sigreturn
    180  * resets the signal mask, the stack, and the
    181  * frame pointer, it returns to the user
    182  * specified pc, psl.
    183  */
    184 
    185 void
    186 linux_sendsig(sig, mask, code)
    187 	int sig;
    188 	sigset_t *mask;
    189 	u_long code;
    190 {
    191 	struct proc *p = curproc;
    192 	struct trapframe *tf;
    193 	struct linux_sigframe *fp, frame;
    194 	int onstack;
    195 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    196 
    197 	tf = p->p_md.md_regs;
    198 
    199 	/* Do we need to jump onto the signal stack? */
    200 	onstack =
    201 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    202 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    203 
    204 	/* Allocate space for the signal handler context. */
    205 	if (onstack)
    206 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    207 					  p->p_sigctx.ps_sigstk.ss_size);
    208 	else
    209 		fp = (struct linux_sigframe *)tf->tf_esp;
    210 	fp--;
    211 
    212 	/* Build stack frame for signal trampoline. */
    213 	frame.sf_handler = catcher;
    214 	frame.sf_sig = native_to_linux_signo[sig];
    215 
    216 	/* Save register context. */
    217 #ifdef VM86
    218 	if (tf->tf_eflags & PSL_VM) {
    219 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    220 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    221 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    222 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    223 		frame.sf_sc.sc_eflags = get_vflags(p);
    224 	} else
    225 #endif
    226 	{
    227 		frame.sf_sc.sc_gs = tf->tf_gs;
    228 		frame.sf_sc.sc_fs = tf->tf_fs;
    229 		frame.sf_sc.sc_es = tf->tf_es;
    230 		frame.sf_sc.sc_ds = tf->tf_ds;
    231 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    232 	}
    233 	frame.sf_sc.sc_edi = tf->tf_edi;
    234 	frame.sf_sc.sc_esi = tf->tf_esi;
    235 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    236 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    237 	frame.sf_sc.sc_edx = tf->tf_edx;
    238 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    239 	frame.sf_sc.sc_eax = tf->tf_eax;
    240 	frame.sf_sc.sc_eip = tf->tf_eip;
    241 	frame.sf_sc.sc_cs = tf->tf_cs;
    242 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    243 	frame.sf_sc.sc_ss = tf->tf_ss;
    244 	frame.sf_sc.sc_err = tf->tf_err;
    245 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    246 	frame.sf_sc.sc_cr2 = p->p_addr->u_pcb.pcb_cr2;
    247 
    248 	/* Save signal stack. */
    249 	/* Linux doesn't save the onstack flag in sigframe */
    250 
    251 	/* Save signal mask. */
    252 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
    253 
    254 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    255 		/*
    256 		 * Process has trashed its stack; give it an illegal
    257 		 * instruction to halt it in its tracks.
    258 		 */
    259 		sigexit(p, SIGILL);
    260 		/* NOTREACHED */
    261 	}
    262 
    263 	/*
    264 	 * Build context to run handler in.
    265 	 */
    266 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    267 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    268 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    269 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    270 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    271 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    272 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    273 	tf->tf_esp = (int)fp;
    274 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    275 
    276 	/* Remember that we're now on the signal stack. */
    277 	if (onstack)
    278 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    279 }
    280 
    281 /*
    282  * System call to cleanup state after a signal
    283  * has been taken.  Reset signal mask and
    284  * stack state from context left by sendsig (above).
    285  * Return to previous pc and psl as specified by
    286  * context left by sendsig. Check carefully to
    287  * make sure that the user has not modified the
    288  * psl to gain improper privileges or to cause
    289  * a machine fault.
    290  */
    291 int
    292 linux_sys_rt_sigreturn(p, v, retval)
    293 	struct proc *p;
    294 	void *v;
    295 	register_t *retval;
    296 {
    297 	/* XXX XAX write me */
    298 	return(ENOSYS);
    299 }
    300 
    301 int
    302 linux_sys_sigreturn(p, v, retval)
    303 	struct proc *p;
    304 	void *v;
    305 	register_t *retval;
    306 {
    307 	struct linux_sys_sigreturn_args /* {
    308 		syscallarg(struct linux_sigcontext *) scp;
    309 	} */ *uap = v;
    310 	struct linux_sigcontext *scp, context;
    311 	struct trapframe *tf;
    312 	sigset_t mask;
    313 	ssize_t ss_gap;
    314 
    315 	/*
    316 	 * The trampoline code hands us the context.
    317 	 * It is unsafe to keep track of it ourselves, in the event that a
    318 	 * program jumps out of a signal handler.
    319 	 */
    320 	scp = SCARG(uap, scp);
    321 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    322 		return (EFAULT);
    323 
    324 	/* Restore register context. */
    325 	tf = p->p_md.md_regs;
    326 #ifdef VM86
    327 	if (context.sc_eflags & PSL_VM) {
    328 		tf->tf_vm86_gs = context.sc_gs;
    329 		tf->tf_vm86_fs = context.sc_fs;
    330 		tf->tf_vm86_es = context.sc_es;
    331 		tf->tf_vm86_ds = context.sc_ds;
    332 		set_vflags(p, context.sc_eflags);
    333 	} else
    334 #endif
    335 	{
    336 		/*
    337 		 * Check for security violations.  If we're returning to
    338 		 * protected mode, the CPU will validate the segment registers
    339 		 * automatically and generate a trap on violations.  We handle
    340 		 * the trap, rather than doing all of the checking here.
    341 		 */
    342 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    343 		    !USERMODE(context.sc_cs, context.sc_eflags))
    344 			return (EINVAL);
    345 
    346 		tf->tf_gs = context.sc_gs;
    347 		tf->tf_fs = context.sc_fs;
    348 		tf->tf_es = context.sc_es;
    349 		tf->tf_ds = context.sc_ds;
    350 		tf->tf_eflags = context.sc_eflags;
    351 	}
    352 	tf->tf_edi = context.sc_edi;
    353 	tf->tf_esi = context.sc_esi;
    354 	tf->tf_ebp = context.sc_ebp;
    355 	tf->tf_ebx = context.sc_ebx;
    356 	tf->tf_edx = context.sc_edx;
    357 	tf->tf_ecx = context.sc_ecx;
    358 	tf->tf_eax = context.sc_eax;
    359 	tf->tf_eip = context.sc_eip;
    360 	tf->tf_cs = context.sc_cs;
    361 	tf->tf_esp = context.sc_esp_at_signal;
    362 	tf->tf_ss = context.sc_ss;
    363 
    364 	/* Restore signal stack. */
    365 	/*
    366 	 * Linux really does it this way; it doesn't have space in sigframe
    367 	 * to save the onstack flag.
    368 	 */
    369 	ss_gap = (ssize_t)
    370 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    371 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    372 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    373 	else
    374 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    375 
    376 	/* Restore signal mask. */
    377 	linux_old_to_native_sigset(&mask, &context.sc_mask);
    378 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    379 
    380 	return (EJUSTRETURN);
    381 }
    382 
    383 #ifdef USER_LDT
    384 
    385 int
    386 linux_read_ldt(p, uap, retval)
    387 	struct proc *p;
    388 	struct linux_sys_modify_ldt_args /* {
    389 		syscallarg(int) func;
    390 		syscallarg(void *) ptr;
    391 		syscallarg(size_t) bytecount;
    392 	} */ *uap;
    393 	register_t *retval;
    394 {
    395 	struct i386_get_ldt_args gl;
    396 	int error;
    397 	caddr_t sg;
    398 	char *parms;
    399 
    400 	DPRINTF(("linux_read_ldt!"));
    401 	sg = stackgap_init(p, 0);
    402 
    403 	gl.start = 0;
    404 	gl.desc = SCARG(uap, ptr);
    405 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    406 
    407 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    408 
    409 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    410 		return (error);
    411 
    412 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
    413 		return (error);
    414 
    415 	*retval *= sizeof(union descriptor);
    416 	return (0);
    417 }
    418 
    419 struct linux_ldt_info {
    420 	u_int entry_number;
    421 	u_long base_addr;
    422 	u_int limit;
    423 	u_int seg_32bit:1;
    424 	u_int contents:2;
    425 	u_int read_exec_only:1;
    426 	u_int limit_in_pages:1;
    427 	u_int seg_not_present:1;
    428 	u_int useable:1;
    429 };
    430 
    431 int
    432 linux_write_ldt(p, uap, retval)
    433 	struct proc *p;
    434 	struct linux_sys_modify_ldt_args /* {
    435 		syscallarg(int) func;
    436 		syscallarg(void *) ptr;
    437 		syscallarg(size_t) bytecount;
    438 	} */ *uap;
    439 	register_t *retval;
    440 {
    441 	struct linux_ldt_info ldt_info;
    442 	struct segment_descriptor sd;
    443 	struct i386_set_ldt_args sl;
    444 	int error;
    445 	caddr_t sg;
    446 	char *parms;
    447 	int oldmode = (int)retval[0];
    448 
    449 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    450 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    451 		return (EINVAL);
    452 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    453 		return error;
    454 	if (ldt_info.entry_number >= 8192)
    455 		return (EINVAL);
    456 	if (ldt_info.contents == 3) {
    457 		if (oldmode)
    458 			return (EINVAL);
    459 		if (ldt_info.seg_not_present)
    460 			return (EINVAL);
    461 	}
    462 
    463 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    464 	    (oldmode || (ldt_info.contents == 0 &&
    465 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    466 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    467 	    ldt_info.useable == 0))) {
    468 		/* this means you should zero the ldt */
    469 		(void)memset(&sd, 0, sizeof(sd));
    470 	} else {
    471 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    472 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    473 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    474 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    475 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    476 		    (!ldt_info.read_exec_only << 1);
    477 		sd.sd_dpl = SEL_UPL;
    478 		sd.sd_p = !ldt_info.seg_not_present;
    479 		sd.sd_def32 = ldt_info.seg_32bit;
    480 		sd.sd_gran = ldt_info.limit_in_pages;
    481 		if (!oldmode)
    482 			sd.sd_xx = ldt_info.useable;
    483 		else
    484 			sd.sd_xx = 0;
    485 	}
    486 	sg = stackgap_init(p, 0);
    487 	sl.start = ldt_info.entry_number;
    488 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    489 	sl.num = 1;
    490 
    491 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    492 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    493 
    494 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    495 
    496 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    497 		return (error);
    498 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    499 		return (error);
    500 
    501 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
    502 		return (error);
    503 
    504 	*retval = 0;
    505 	return (0);
    506 }
    507 
    508 #endif /* USER_LDT */
    509 
    510 int
    511 linux_sys_modify_ldt(p, v, retval)
    512 	struct proc *p;
    513 	void *v;
    514 	register_t *retval;
    515 {
    516 	struct linux_sys_modify_ldt_args /* {
    517 		syscallarg(int) func;
    518 		syscallarg(void *) ptr;
    519 		syscallarg(size_t) bytecount;
    520 	} */ *uap = v;
    521 
    522 	switch (SCARG(uap, func)) {
    523 #ifdef USER_LDT
    524 	case 0:
    525 		return linux_read_ldt(p, uap, retval);
    526 	case 1:
    527 		retval[0] = 1;
    528 		return linux_write_ldt(p, uap, retval);
    529 	case 2:
    530 #ifdef notyet
    531 		return (linux_read_default_ldt(p, uap, retval);
    532 #else
    533 		return (ENOSYS);
    534 #endif
    535 	case 0x11:
    536 		retval[0] = 0;
    537 		return linux_write_ldt(p, uap, retval);
    538 #endif /* USER_LDT */
    539 
    540 	default:
    541 		return (ENOSYS);
    542 	}
    543 }
    544 
    545 /*
    546  * XXX Pathetic hack to make svgalib work. This will fake the major
    547  * device number of an opened VT so that svgalib likes it. grmbl.
    548  * Should probably do it 'wrong the right way' and use a mapping
    549  * array for all major device numbers, and map linux_mknod too.
    550  */
    551 dev_t
    552 linux_fakedev(dev, raw)
    553 	dev_t dev;
    554 	int raw;
    555 {
    556 	if (raw) {
    557 #if (NWSDISPLAY > 0)
    558 		extern const struct cdevsw wsdisplay_cdevsw;
    559 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
    560 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    561 #endif
    562 	}
    563 
    564 	return dev;
    565 }
    566 
    567 #if (NWSDISPLAY > 0)
    568 /*
    569  * That's not complete, but enough to get an X server running.
    570  */
    571 #define NR_KEYS 128
    572 static const u_short plain_map[NR_KEYS] = {
    573 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    574 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    575 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    576 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    577 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    578 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    579 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    580 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    581 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    582 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    583 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    584 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    585 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    586 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    587 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    588 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    589 }, shift_map[NR_KEYS] = {
    590 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    591 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    592 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    593 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    594 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    595 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    596 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    597 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    598 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    599 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    600 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    601 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    602 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    603 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    604 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    605 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    606 }, altgr_map[NR_KEYS] = {
    607 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    608 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    609 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    610 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    611 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    612 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    613 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    614 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    615 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    616 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    617 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    618 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    619 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    620 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    621 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    622 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    623 }, ctrl_map[NR_KEYS] = {
    624 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    625 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    626 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    627 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    628 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    629 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    630 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    631 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    632 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    633 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    634 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    635 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    636 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    637 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    638 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    639 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    640 };
    641 
    642 const u_short * const linux_keytabs[] = {
    643 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    644 };
    645 #endif
    646 
    647 static struct biosdisk_info *
    648 fd2biosinfo(p, fp)
    649 	struct proc *p;
    650 	struct file *fp;
    651 {
    652 	struct vnode *vp;
    653 	const char *blkname;
    654 	char diskname[16];
    655 	int i;
    656 	struct nativedisk_info *nip;
    657 	struct disklist *dl = i386_alldisks;
    658 
    659 	if (fp->f_type != DTYPE_VNODE)
    660 		return NULL;
    661 	vp = (struct vnode *)fp->f_data;
    662 
    663 	if (vp->v_type != VBLK)
    664 		return NULL;
    665 
    666 	blkname = devsw_blk2name(major(vp->v_rdev));
    667 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    668 	    DISKUNIT(vp->v_rdev));
    669 
    670 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    671 		nip = &dl->dl_nativedisks[i];
    672 		if (strcmp(diskname, nip->ni_devname))
    673 			continue;
    674 		if (nip->ni_nmatches != 0)
    675 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    676 	}
    677 
    678 	return NULL;
    679 }
    680 
    681 
    682 /*
    683  * We come here in a last attempt to satisfy a Linux ioctl() call
    684  */
    685 int
    686 linux_machdepioctl(p, v, retval)
    687 	struct proc *p;
    688 	void *v;
    689 	register_t *retval;
    690 {
    691 	struct linux_sys_ioctl_args /* {
    692 		syscallarg(int) fd;
    693 		syscallarg(u_long) com;
    694 		syscallarg(caddr_t) data;
    695 	} */ *uap = v;
    696 	struct sys_ioctl_args bia;
    697 	u_long com;
    698 	int error, error1;
    699 #if (NWSDISPLAY > 0)
    700 	struct vt_mode lvt;
    701 	caddr_t bvtp, sg;
    702 	struct kbentry kbe;
    703 #endif
    704 	struct linux_hd_geometry hdg;
    705 	struct linux_hd_big_geometry hdg_big;
    706 	struct biosdisk_info *bip;
    707 	struct filedesc *fdp;
    708 	struct file *fp;
    709 	int fd;
    710 	struct disklabel label, *labp;
    711 	struct partinfo partp;
    712 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    713 	u_long start, biostotal, realtotal;
    714 	u_char heads, sectors;
    715 	u_int cylinders;
    716 	struct ioctl_pt pt;
    717 
    718 	fd = SCARG(uap, fd);
    719 	SCARG(&bia, fd) = fd;
    720 	SCARG(&bia, data) = SCARG(uap, data);
    721 	com = SCARG(uap, com);
    722 
    723 	fdp = p->p_fd;
    724 
    725 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    726 		return (EBADF);
    727 
    728 	switch (com) {
    729 #if (NWSDISPLAY > 0)
    730 	case LINUX_KDGKBMODE:
    731 		com = KDGKBMODE;
    732 		break;
    733 	case LINUX_KDSKBMODE:
    734 		com = KDSKBMODE;
    735 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    736 			SCARG(&bia, data) = (caddr_t)K_RAW;
    737 		break;
    738 	case LINUX_KIOCSOUND:
    739 		SCARG(&bia, data) =
    740 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    741 		/* fall through */
    742 	case LINUX_KDMKTONE:
    743 		com = KDMKTONE;
    744 		break;
    745 	case LINUX_KDSETMODE:
    746 		com = KDSETMODE;
    747 		break;
    748 	case LINUX_KDGETMODE:
    749 		/* KD_* values are equal to the wscons numbers */
    750 		com = WSDISPLAYIO_GMODE;
    751 		break;
    752 	case LINUX_KDENABIO:
    753 		com = KDENABIO;
    754 		break;
    755 	case LINUX_KDDISABIO:
    756 		com = KDDISABIO;
    757 		break;
    758 	case LINUX_KDGETLED:
    759 		com = KDGETLED;
    760 		break;
    761 	case LINUX_KDSETLED:
    762 		com = KDSETLED;
    763 		break;
    764 	case LINUX_VT_OPENQRY:
    765 		com = VT_OPENQRY;
    766 		break;
    767 	case LINUX_VT_GETMODE:
    768 		SCARG(&bia, com) = VT_GETMODE;
    769 		if ((error = sys_ioctl(p, &bia, retval)))
    770 			return error;
    771 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    772 		    sizeof (struct vt_mode))))
    773 			return error;
    774 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    775 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    776 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    777 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    778 		    sizeof (struct vt_mode));
    779 	case LINUX_VT_SETMODE:
    780 		com = VT_SETMODE;
    781 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    782 		    sizeof (struct vt_mode))))
    783 			return error;
    784 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    785 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    786 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    787 		sg = stackgap_init(p, 0);
    788 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    789 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    790 			return error;
    791 		SCARG(&bia, data) = bvtp;
    792 		break;
    793 	case LINUX_VT_DISALLOCATE:
    794 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    795 		return 0;
    796 	case LINUX_VT_RELDISP:
    797 		com = VT_RELDISP;
    798 		break;
    799 	case LINUX_VT_ACTIVATE:
    800 		com = VT_ACTIVATE;
    801 		break;
    802 	case LINUX_VT_WAITACTIVE:
    803 		com = VT_WAITACTIVE;
    804 		break;
    805 	case LINUX_VT_GETSTATE:
    806 		com = VT_GETSTATE;
    807 		break;
    808 	case LINUX_KDGKBTYPE:
    809 		/* This is what Linux does. */
    810 		return (subyte(SCARG(uap, data), KB_101));
    811 	case LINUX_KDGKBENT:
    812 		/*
    813 		 * The Linux KDGKBENT ioctl is different from the
    814 		 * SYSV original. So we handle it in machdep code.
    815 		 * XXX We should use keyboard mapping information
    816 		 * from wsdisplay, but this would be expensive.
    817 		 */
    818 		if ((error = copyin(SCARG(uap, data), &kbe,
    819 				    sizeof(struct kbentry))))
    820 			return (error);
    821 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    822 		    || kbe.kb_index >= NR_KEYS)
    823 			return (EINVAL);
    824 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    825 		return (copyout(&kbe, SCARG(uap, data),
    826 				sizeof(struct kbentry)));
    827 #endif
    828 	case LINUX_HDIO_GETGEO:
    829 	case LINUX_HDIO_GETGEO_BIG:
    830 		/*
    831 		 * Try to mimic Linux behaviour: return the BIOS geometry
    832 		 * if possible (extending its # of cylinders if it's beyond
    833 		 * the 1023 limit), fall back to the MI geometry (i.e.
    834 		 * the real geometry) if not found, by returning an
    835 		 * error. See common/linux_hdio.c
    836 		 */
    837 		FILE_USE(fp);
    838 		bip = fd2biosinfo(p, fp);
    839 		ioctlf = fp->f_ops->fo_ioctl;
    840 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    841 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    842 		FILE_UNUSE(fp, p);
    843 		if (error != 0 && error1 != 0)
    844 			return error1;
    845 		labp = error != 0 ? &label : partp.disklab;
    846 		start = error1 != 0 ? partp.part->p_offset : 0;
    847 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    848 		    && bip->bi_cyl != 0) {
    849 			heads = bip->bi_head;
    850 			sectors = bip->bi_sec;
    851 			cylinders = bip->bi_cyl;
    852 			biostotal = heads * sectors * cylinders;
    853 			realtotal = labp->d_ntracks * labp->d_nsectors *
    854 			    labp->d_ncylinders;
    855 			if (realtotal > biostotal)
    856 				cylinders = realtotal / (heads * sectors);
    857 		} else {
    858 			heads = labp->d_ntracks;
    859 			cylinders = labp->d_ncylinders;
    860 			sectors = labp->d_nsectors;
    861 		}
    862 		if (com == LINUX_HDIO_GETGEO) {
    863 			hdg.start = start;
    864 			hdg.heads = heads;
    865 			hdg.cylinders = cylinders;
    866 			hdg.sectors = sectors;
    867 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    868 		} else {
    869 			hdg_big.start = start;
    870 			hdg_big.heads = heads;
    871 			hdg_big.cylinders = cylinders;
    872 			hdg_big.sectors = sectors;
    873 			return copyout(&hdg_big, SCARG(uap, data),
    874 			    sizeof hdg_big);
    875 		}
    876 
    877 	default:
    878 		/*
    879 		 * Unknown to us. If it's on a device, just pass it through
    880 		 * using PTIOCLINUX, the device itself might be able to
    881 		 * make some sense of it.
    882 		 * XXX hack: if the function returns EJUSTRETURN,
    883 		 * it has stuffed a sysctl return value in pt.data.
    884 		 */
    885 		FILE_USE(fp);
    886 		ioctlf = fp->f_ops->fo_ioctl;
    887 		pt.com = SCARG(uap, com);
    888 		pt.data = SCARG(uap, data);
    889 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    890 		FILE_UNUSE(fp, p);
    891 		if (error == EJUSTRETURN) {
    892 			retval[0] = (register_t)pt.data;
    893 			error = 0;
    894 		}
    895 
    896 		if (error == ENOTTY)
    897 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
    898 			    com));
    899 		return error;
    900 	}
    901 	SCARG(&bia, com) = com;
    902 	return sys_ioctl(p, &bia, retval);
    903 }
    904 
    905 /*
    906  * Set I/O permissions for a process. Just set the maximum level
    907  * right away (ignoring the argument), otherwise we would have
    908  * to rely on I/O permission maps, which are not implemented.
    909  */
    910 int
    911 linux_sys_iopl(p, v, retval)
    912 	struct proc *p;
    913 	void *v;
    914 	register_t *retval;
    915 {
    916 #if 0
    917 	struct linux_sys_iopl_args /* {
    918 		syscallarg(int) level;
    919 	} */ *uap = v;
    920 #endif
    921 	struct trapframe *fp = p->p_md.md_regs;
    922 
    923 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    924 		return EPERM;
    925 	fp->tf_eflags |= PSL_IOPL;
    926 	*retval = 0;
    927 	return 0;
    928 }
    929 
    930 /*
    931  * See above. If a root process tries to set access to an I/O port,
    932  * just let it have the whole range.
    933  */
    934 int
    935 linux_sys_ioperm(p, v, retval)
    936 	struct proc *p;
    937 	void *v;
    938 	register_t *retval;
    939 {
    940 	struct linux_sys_ioperm_args /* {
    941 		syscallarg(unsigned int) lo;
    942 		syscallarg(unsigned int) hi;
    943 		syscallarg(int) val;
    944 	} */ *uap = v;
    945 	struct trapframe *fp = p->p_md.md_regs;
    946 
    947 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    948 		return EPERM;
    949 	if (SCARG(uap, val))
    950 		fp->tf_eflags |= PSL_IOPL;
    951 	*retval = 0;
    952 	return 0;
    953 }
    954