Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.81
      1 /*	$NetBSD: linux_machdep.c,v 1.81 2002/10/09 05:07:55 junyoung Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.81 2002/10/09 05:07:55 junyoung Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/syscallargs.h>
     66 #include <sys/filedesc.h>
     67 #include <sys/exec_elf.h>
     68 #include <sys/disklabel.h>
     69 #include <sys/ioctl.h>
     70 #include <miscfs/specfs/specdev.h>
     71 
     72 #include <compat/linux/common/linux_types.h>
     73 #include <compat/linux/common/linux_signal.h>
     74 #include <compat/linux/common/linux_util.h>
     75 #include <compat/linux/common/linux_ioctl.h>
     76 #include <compat/linux/common/linux_hdio.h>
     77 #include <compat/linux/common/linux_exec.h>
     78 #include <compat/linux/common/linux_machdep.h>
     79 
     80 #include <compat/linux/linux_syscallargs.h>
     81 
     82 #include <machine/cpu.h>
     83 #include <machine/cpufunc.h>
     84 #include <machine/psl.h>
     85 #include <machine/reg.h>
     86 #include <machine/segments.h>
     87 #include <machine/specialreg.h>
     88 #include <machine/sysarch.h>
     89 #include <machine/vm86.h>
     90 #include <machine/vmparam.h>
     91 
     92 /*
     93  * To see whether wscons is configured (for virtual console ioctl calls).
     94  */
     95 #if defined(_KERNEL_OPT)
     96 #include "wsdisplay.h"
     97 #endif
     98 #if (NWSDISPLAY > 0)
     99 #include <dev/wscons/wsconsio.h>
    100 #include <dev/wscons/wsdisplay_usl_io.h>
    101 #if defined(_KERNEL_OPT)
    102 #include "opt_xserver.h"
    103 #endif
    104 #endif
    105 
    106 #ifdef USER_LDT
    107 #include <machine/cpu.h>
    108 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    109     register_t *));
    110 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
    111     register_t *));
    112 #endif
    113 
    114 #ifdef DEBUG_LINUX
    115 #define DPRINTF(a) uprintf a
    116 #else
    117 #define DPRINTF(a)
    118 #endif
    119 
    120 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    121 extern struct disklist *i386_alldisks;
    122 
    123 /*
    124  * Deal with some i386-specific things in the Linux emulation code.
    125  */
    126 
    127 void
    128 linux_setregs(p, epp, stack)
    129 	struct proc *p;
    130 	struct exec_package *epp;
    131 	u_long stack;
    132 {
    133 	struct pcb *pcb = &p->p_addr->u_pcb;
    134 	struct trapframe *tf;
    135 
    136 #if NNPX > 0
    137 	/* If we were using the FPU, forget about it. */
    138 	if (npxproc == p)
    139 		npxdrop();
    140 #endif
    141 
    142 #ifdef USER_LDT
    143 	pmap_ldt_cleanup(p);
    144 #endif
    145 
    146 	p->p_md.md_flags &= ~MDP_USEDFPU;
    147 
    148 	if (i386_use_fxsave) {
    149 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    150 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    151 	} else
    152 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    153 
    154 	tf = p->p_md.md_regs;
    155 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    156 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    157 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    158 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    159 	tf->tf_edi = 0;
    160 	tf->tf_esi = 0;
    161 	tf->tf_ebp = 0;
    162 	tf->tf_ebx = (int)p->p_psstr;
    163 	tf->tf_edx = 0;
    164 	tf->tf_ecx = 0;
    165 	tf->tf_eax = 0;
    166 	tf->tf_eip = epp->ep_entry;
    167 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    168 	tf->tf_eflags = PSL_USERSET;
    169 	tf->tf_esp = stack;
    170 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    171 }
    172 
    173 /*
    174  * Send an interrupt to process.
    175  *
    176  * Stack is set up to allow sigcode stored
    177  * in u. to call routine, followed by kcall
    178  * to sigreturn routine below.  After sigreturn
    179  * resets the signal mask, the stack, and the
    180  * frame pointer, it returns to the user
    181  * specified pc, psl.
    182  */
    183 
    184 void
    185 linux_sendsig(sig, mask, code)
    186 	int sig;
    187 	sigset_t *mask;
    188 	u_long code;
    189 {
    190 	struct proc *p = curproc;
    191 	struct trapframe *tf;
    192 	struct linux_sigframe *fp, frame;
    193 	int onstack;
    194 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    195 
    196 	tf = p->p_md.md_regs;
    197 
    198 	/* Do we need to jump onto the signal stack? */
    199 	onstack =
    200 	    (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    201 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    202 
    203 	/* Allocate space for the signal handler context. */
    204 	if (onstack)
    205 		fp = (struct linux_sigframe *)((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
    206 					  p->p_sigctx.ps_sigstk.ss_size);
    207 	else
    208 		fp = (struct linux_sigframe *)tf->tf_esp;
    209 	fp--;
    210 
    211 	/* Build stack frame for signal trampoline. */
    212 	frame.sf_handler = catcher;
    213 	frame.sf_sig = native_to_linux_signo[sig];
    214 
    215 	/* Save register context. */
    216 #ifdef VM86
    217 	if (tf->tf_eflags & PSL_VM) {
    218 		frame.sf_sc.sc_gs = tf->tf_vm86_gs;
    219 		frame.sf_sc.sc_fs = tf->tf_vm86_fs;
    220 		frame.sf_sc.sc_es = tf->tf_vm86_es;
    221 		frame.sf_sc.sc_ds = tf->tf_vm86_ds;
    222 		frame.sf_sc.sc_eflags = get_vflags(p);
    223 	} else
    224 #endif
    225 	{
    226 		frame.sf_sc.sc_gs = tf->tf_gs;
    227 		frame.sf_sc.sc_fs = tf->tf_fs;
    228 		frame.sf_sc.sc_es = tf->tf_es;
    229 		frame.sf_sc.sc_ds = tf->tf_ds;
    230 		frame.sf_sc.sc_eflags = tf->tf_eflags;
    231 	}
    232 	frame.sf_sc.sc_edi = tf->tf_edi;
    233 	frame.sf_sc.sc_esi = tf->tf_esi;
    234 	frame.sf_sc.sc_ebp = tf->tf_ebp;
    235 	frame.sf_sc.sc_ebx = tf->tf_ebx;
    236 	frame.sf_sc.sc_edx = tf->tf_edx;
    237 	frame.sf_sc.sc_ecx = tf->tf_ecx;
    238 	frame.sf_sc.sc_eax = tf->tf_eax;
    239 	frame.sf_sc.sc_eip = tf->tf_eip;
    240 	frame.sf_sc.sc_cs = tf->tf_cs;
    241 	frame.sf_sc.sc_esp_at_signal = tf->tf_esp;
    242 	frame.sf_sc.sc_ss = tf->tf_ss;
    243 	frame.sf_sc.sc_err = tf->tf_err;
    244 	frame.sf_sc.sc_trapno = tf->tf_trapno;
    245 	frame.sf_sc.sc_cr2 = p->p_addr->u_pcb.pcb_cr2;
    246 
    247 	/* Save signal stack. */
    248 	/* Linux doesn't save the onstack flag in sigframe */
    249 
    250 	/* Save signal mask. */
    251 	native_to_linux_old_sigset(&frame.sf_sc.sc_mask, mask);
    252 
    253 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    254 		/*
    255 		 * Process has trashed its stack; give it an illegal
    256 		 * instruction to halt it in its tracks.
    257 		 */
    258 		sigexit(p, SIGILL);
    259 		/* NOTREACHED */
    260 	}
    261 
    262 	/*
    263 	 * Build context to run handler in.
    264 	 */
    265 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    266 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    267 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    268 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    269 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    270 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    271 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    272 	tf->tf_esp = (int)fp;
    273 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    274 
    275 	/* Remember that we're now on the signal stack. */
    276 	if (onstack)
    277 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    278 }
    279 
    280 /*
    281  * System call to cleanup state after a signal
    282  * has been taken.  Reset signal mask and
    283  * stack state from context left by sendsig (above).
    284  * Return to previous pc and psl as specified by
    285  * context left by sendsig. Check carefully to
    286  * make sure that the user has not modified the
    287  * psl to gain improper privileges or to cause
    288  * a machine fault.
    289  */
    290 int
    291 linux_sys_rt_sigreturn(p, v, retval)
    292 	struct proc *p;
    293 	void *v;
    294 	register_t *retval;
    295 {
    296 	/* XXX XAX write me */
    297 	return(ENOSYS);
    298 }
    299 
    300 int
    301 linux_sys_sigreturn(p, v, retval)
    302 	struct proc *p;
    303 	void *v;
    304 	register_t *retval;
    305 {
    306 	struct linux_sys_sigreturn_args /* {
    307 		syscallarg(struct linux_sigcontext *) scp;
    308 	} */ *uap = v;
    309 	struct linux_sigcontext *scp, context;
    310 	struct trapframe *tf;
    311 	sigset_t mask;
    312 	ssize_t ss_gap;
    313 
    314 	/*
    315 	 * The trampoline code hands us the context.
    316 	 * It is unsafe to keep track of it ourselves, in the event that a
    317 	 * program jumps out of a signal handler.
    318 	 */
    319 	scp = SCARG(uap, scp);
    320 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
    321 		return (EFAULT);
    322 
    323 	/* Restore register context. */
    324 	tf = p->p_md.md_regs;
    325 #ifdef VM86
    326 	if (context.sc_eflags & PSL_VM) {
    327 		tf->tf_vm86_gs = context.sc_gs;
    328 		tf->tf_vm86_fs = context.sc_fs;
    329 		tf->tf_vm86_es = context.sc_es;
    330 		tf->tf_vm86_ds = context.sc_ds;
    331 		set_vflags(p, context.sc_eflags);
    332 	} else
    333 #endif
    334 	{
    335 		/*
    336 		 * Check for security violations.  If we're returning to
    337 		 * protected mode, the CPU will validate the segment registers
    338 		 * automatically and generate a trap on violations.  We handle
    339 		 * the trap, rather than doing all of the checking here.
    340 		 */
    341 		if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    342 		    !USERMODE(context.sc_cs, context.sc_eflags))
    343 			return (EINVAL);
    344 
    345 		tf->tf_gs = context.sc_gs;
    346 		tf->tf_fs = context.sc_fs;
    347 		tf->tf_es = context.sc_es;
    348 		tf->tf_ds = context.sc_ds;
    349 		tf->tf_eflags = context.sc_eflags;
    350 	}
    351 	tf->tf_edi = context.sc_edi;
    352 	tf->tf_esi = context.sc_esi;
    353 	tf->tf_ebp = context.sc_ebp;
    354 	tf->tf_ebx = context.sc_ebx;
    355 	tf->tf_edx = context.sc_edx;
    356 	tf->tf_ecx = context.sc_ecx;
    357 	tf->tf_eax = context.sc_eax;
    358 	tf->tf_eip = context.sc_eip;
    359 	tf->tf_cs = context.sc_cs;
    360 	tf->tf_esp = context.sc_esp_at_signal;
    361 	tf->tf_ss = context.sc_ss;
    362 
    363 	/* Restore signal stack. */
    364 	/*
    365 	 * Linux really does it this way; it doesn't have space in sigframe
    366 	 * to save the onstack flag.
    367 	 */
    368 	ss_gap = (ssize_t)
    369 	    ((caddr_t) context.sc_esp_at_signal - (caddr_t) p->p_sigctx.ps_sigstk.ss_sp);
    370 	if (ss_gap >= 0  && ss_gap < p->p_sigctx.ps_sigstk.ss_size)
    371 		p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
    372 	else
    373 		p->p_sigctx.ps_sigstk.ss_flags &= ~SS_ONSTACK;
    374 
    375 	/* Restore signal mask. */
    376 	linux_old_to_native_sigset(&mask, &context.sc_mask);
    377 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    378 
    379 	return (EJUSTRETURN);
    380 }
    381 
    382 #ifdef USER_LDT
    383 
    384 int
    385 linux_read_ldt(p, uap, retval)
    386 	struct proc *p;
    387 	struct linux_sys_modify_ldt_args /* {
    388 		syscallarg(int) func;
    389 		syscallarg(void *) ptr;
    390 		syscallarg(size_t) bytecount;
    391 	} */ *uap;
    392 	register_t *retval;
    393 {
    394 	struct i386_get_ldt_args gl;
    395 	int error;
    396 	caddr_t sg;
    397 	char *parms;
    398 
    399 	DPRINTF(("linux_read_ldt!"));
    400 	sg = stackgap_init(p, 0);
    401 
    402 	gl.start = 0;
    403 	gl.desc = SCARG(uap, ptr);
    404 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    405 
    406 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    407 
    408 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    409 		return (error);
    410 
    411 	if ((error = i386_get_ldt(p, parms, retval)) != 0)
    412 		return (error);
    413 
    414 	*retval *= sizeof(union descriptor);
    415 	return (0);
    416 }
    417 
    418 struct linux_ldt_info {
    419 	u_int entry_number;
    420 	u_long base_addr;
    421 	u_int limit;
    422 	u_int seg_32bit:1;
    423 	u_int contents:2;
    424 	u_int read_exec_only:1;
    425 	u_int limit_in_pages:1;
    426 	u_int seg_not_present:1;
    427 	u_int useable:1;
    428 };
    429 
    430 int
    431 linux_write_ldt(p, uap, retval)
    432 	struct proc *p;
    433 	struct linux_sys_modify_ldt_args /* {
    434 		syscallarg(int) func;
    435 		syscallarg(void *) ptr;
    436 		syscallarg(size_t) bytecount;
    437 	} */ *uap;
    438 	register_t *retval;
    439 {
    440 	struct linux_ldt_info ldt_info;
    441 	struct segment_descriptor sd;
    442 	struct i386_set_ldt_args sl;
    443 	int error;
    444 	caddr_t sg;
    445 	char *parms;
    446 	int oldmode = (int)retval[0];
    447 
    448 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    449 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    450 		return (EINVAL);
    451 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    452 		return error;
    453 	if (ldt_info.entry_number >= 8192)
    454 		return (EINVAL);
    455 	if (ldt_info.contents == 3) {
    456 		if (oldmode)
    457 			return (EINVAL);
    458 		if (ldt_info.seg_not_present)
    459 			return (EINVAL);
    460 	}
    461 
    462 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    463 	    (oldmode || (ldt_info.contents == 0 &&
    464 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    465 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    466 	    ldt_info.useable == 0))) {
    467 		/* this means you should zero the ldt */
    468 		(void)memset(&sd, 0, sizeof(sd));
    469 	} else {
    470 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    471 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    472 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    473 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    474 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    475 		    (!ldt_info.read_exec_only << 1);
    476 		sd.sd_dpl = SEL_UPL;
    477 		sd.sd_p = !ldt_info.seg_not_present;
    478 		sd.sd_def32 = ldt_info.seg_32bit;
    479 		sd.sd_gran = ldt_info.limit_in_pages;
    480 		if (!oldmode)
    481 			sd.sd_xx = ldt_info.useable;
    482 		else
    483 			sd.sd_xx = 0;
    484 	}
    485 	sg = stackgap_init(p, 0);
    486 	sl.start = ldt_info.entry_number;
    487 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    488 	sl.num = 1;
    489 
    490 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    491 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    492 
    493 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    494 
    495 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    496 		return (error);
    497 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    498 		return (error);
    499 
    500 	if ((error = i386_set_ldt(p, parms, retval)) != 0)
    501 		return (error);
    502 
    503 	*retval = 0;
    504 	return (0);
    505 }
    506 
    507 #endif /* USER_LDT */
    508 
    509 int
    510 linux_sys_modify_ldt(p, v, retval)
    511 	struct proc *p;
    512 	void *v;
    513 	register_t *retval;
    514 {
    515 	struct linux_sys_modify_ldt_args /* {
    516 		syscallarg(int) func;
    517 		syscallarg(void *) ptr;
    518 		syscallarg(size_t) bytecount;
    519 	} */ *uap = v;
    520 
    521 	switch (SCARG(uap, func)) {
    522 #ifdef USER_LDT
    523 	case 0:
    524 		return linux_read_ldt(p, uap, retval);
    525 	case 1:
    526 		retval[0] = 1;
    527 		return linux_write_ldt(p, uap, retval);
    528 	case 2:
    529 #ifdef notyet
    530 		return (linux_read_default_ldt(p, uap, retval);
    531 #else
    532 		return (ENOSYS);
    533 #endif
    534 	case 0x11:
    535 		retval[0] = 0;
    536 		return linux_write_ldt(p, uap, retval);
    537 #endif /* USER_LDT */
    538 
    539 	default:
    540 		return (ENOSYS);
    541 	}
    542 }
    543 
    544 /*
    545  * XXX Pathetic hack to make svgalib work. This will fake the major
    546  * device number of an opened VT so that svgalib likes it. grmbl.
    547  * Should probably do it 'wrong the right way' and use a mapping
    548  * array for all major device numbers, and map linux_mknod too.
    549  */
    550 dev_t
    551 linux_fakedev(dev, raw)
    552 	dev_t dev;
    553 	int raw;
    554 {
    555 	if (raw) {
    556 #if (NWSDISPLAY > 0)
    557 		extern const struct cdevsw wsdisplay_cdevsw;
    558 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
    559 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    560 #endif
    561 	}
    562 
    563 	return dev;
    564 }
    565 
    566 #if (NWSDISPLAY > 0)
    567 /*
    568  * That's not complete, but enough to get an X server running.
    569  */
    570 #define NR_KEYS 128
    571 static const u_short plain_map[NR_KEYS] = {
    572 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    573 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    574 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    575 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    576 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    577 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    578 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    579 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    580 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    581 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    582 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    583 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    584 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    585 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    586 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    587 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    588 }, shift_map[NR_KEYS] = {
    589 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    590 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    591 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    592 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    593 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    594 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    595 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    596 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    597 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    598 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    599 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    600 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    601 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    602 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    603 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    604 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    605 }, altgr_map[NR_KEYS] = {
    606 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    607 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    608 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    609 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    610 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    611 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    612 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    613 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    614 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    615 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    616 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    617 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    618 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    619 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    620 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    621 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    622 }, ctrl_map[NR_KEYS] = {
    623 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    624 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    625 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    626 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    627 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    628 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    629 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    630 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    631 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    632 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    633 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    634 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    635 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    636 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    637 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    638 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    639 };
    640 
    641 const u_short * const linux_keytabs[] = {
    642 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    643 };
    644 #endif
    645 
    646 static struct biosdisk_info *
    647 fd2biosinfo(p, fp)
    648 	struct proc *p;
    649 	struct file *fp;
    650 {
    651 	struct vnode *vp;
    652 	const char *blkname;
    653 	char diskname[16];
    654 	int i;
    655 	struct nativedisk_info *nip;
    656 	struct disklist *dl = i386_alldisks;
    657 
    658 	if (fp->f_type != DTYPE_VNODE)
    659 		return NULL;
    660 	vp = (struct vnode *)fp->f_data;
    661 
    662 	if (vp->v_type != VBLK)
    663 		return NULL;
    664 
    665 	blkname = devsw_blk2name(major(vp->v_rdev));
    666 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    667 	    DISKUNIT(vp->v_rdev));
    668 
    669 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    670 		nip = &dl->dl_nativedisks[i];
    671 		if (strcmp(diskname, nip->ni_devname))
    672 			continue;
    673 		if (nip->ni_nmatches != 0)
    674 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    675 	}
    676 
    677 	return NULL;
    678 }
    679 
    680 
    681 /*
    682  * We come here in a last attempt to satisfy a Linux ioctl() call
    683  */
    684 int
    685 linux_machdepioctl(p, v, retval)
    686 	struct proc *p;
    687 	void *v;
    688 	register_t *retval;
    689 {
    690 	struct linux_sys_ioctl_args /* {
    691 		syscallarg(int) fd;
    692 		syscallarg(u_long) com;
    693 		syscallarg(caddr_t) data;
    694 	} */ *uap = v;
    695 	struct sys_ioctl_args bia;
    696 	u_long com;
    697 	int error, error1;
    698 #if (NWSDISPLAY > 0)
    699 	struct vt_mode lvt;
    700 	caddr_t bvtp, sg;
    701 	struct kbentry kbe;
    702 #endif
    703 	struct linux_hd_geometry hdg;
    704 	struct linux_hd_big_geometry hdg_big;
    705 	struct biosdisk_info *bip;
    706 	struct filedesc *fdp;
    707 	struct file *fp;
    708 	int fd;
    709 	struct disklabel label, *labp;
    710 	struct partinfo partp;
    711 	int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
    712 	u_long start, biostotal, realtotal;
    713 	u_char heads, sectors;
    714 	u_int cylinders;
    715 	struct ioctl_pt pt;
    716 
    717 	fd = SCARG(uap, fd);
    718 	SCARG(&bia, fd) = fd;
    719 	SCARG(&bia, data) = SCARG(uap, data);
    720 	com = SCARG(uap, com);
    721 
    722 	fdp = p->p_fd;
    723 
    724 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    725 		return (EBADF);
    726 
    727 	switch (com) {
    728 #if (NWSDISPLAY > 0)
    729 	case LINUX_KDGKBMODE:
    730 		com = KDGKBMODE;
    731 		break;
    732 	case LINUX_KDSKBMODE:
    733 		com = KDSKBMODE;
    734 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    735 			SCARG(&bia, data) = (caddr_t)K_RAW;
    736 		break;
    737 	case LINUX_KIOCSOUND:
    738 		SCARG(&bia, data) =
    739 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    740 		/* fall through */
    741 	case LINUX_KDMKTONE:
    742 		com = KDMKTONE;
    743 		break;
    744 	case LINUX_KDSETMODE:
    745 		com = KDSETMODE;
    746 		break;
    747 	case LINUX_KDGETMODE:
    748 		/* KD_* values are equal to the wscons numbers */
    749 		com = WSDISPLAYIO_GMODE;
    750 		break;
    751 	case LINUX_KDENABIO:
    752 		com = KDENABIO;
    753 		break;
    754 	case LINUX_KDDISABIO:
    755 		com = KDDISABIO;
    756 		break;
    757 	case LINUX_KDGETLED:
    758 		com = KDGETLED;
    759 		break;
    760 	case LINUX_KDSETLED:
    761 		com = KDSETLED;
    762 		break;
    763 	case LINUX_VT_OPENQRY:
    764 		com = VT_OPENQRY;
    765 		break;
    766 	case LINUX_VT_GETMODE:
    767 		SCARG(&bia, com) = VT_GETMODE;
    768 		if ((error = sys_ioctl(p, &bia, retval)))
    769 			return error;
    770 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    771 		    sizeof (struct vt_mode))))
    772 			return error;
    773 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    774 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    775 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    776 		return copyout((caddr_t)&lvt, SCARG(uap, data),
    777 		    sizeof (struct vt_mode));
    778 	case LINUX_VT_SETMODE:
    779 		com = VT_SETMODE;
    780 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    781 		    sizeof (struct vt_mode))))
    782 			return error;
    783 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    784 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    785 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    786 		sg = stackgap_init(p, 0);
    787 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    788 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    789 			return error;
    790 		SCARG(&bia, data) = bvtp;
    791 		break;
    792 	case LINUX_VT_DISALLOCATE:
    793 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    794 		return 0;
    795 	case LINUX_VT_RELDISP:
    796 		com = VT_RELDISP;
    797 		break;
    798 	case LINUX_VT_ACTIVATE:
    799 		com = VT_ACTIVATE;
    800 		break;
    801 	case LINUX_VT_WAITACTIVE:
    802 		com = VT_WAITACTIVE;
    803 		break;
    804 	case LINUX_VT_GETSTATE:
    805 		com = VT_GETSTATE;
    806 		break;
    807 	case LINUX_KDGKBTYPE:
    808 		/* This is what Linux does. */
    809 		return (subyte(SCARG(uap, data), KB_101));
    810 	case LINUX_KDGKBENT:
    811 		/*
    812 		 * The Linux KDGKBENT ioctl is different from the
    813 		 * SYSV original. So we handle it in machdep code.
    814 		 * XXX We should use keyboard mapping information
    815 		 * from wsdisplay, but this would be expensive.
    816 		 */
    817 		if ((error = copyin(SCARG(uap, data), &kbe,
    818 				    sizeof(struct kbentry))))
    819 			return (error);
    820 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    821 		    || kbe.kb_index >= NR_KEYS)
    822 			return (EINVAL);
    823 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    824 		return (copyout(&kbe, SCARG(uap, data),
    825 				sizeof(struct kbentry)));
    826 #endif
    827 	case LINUX_HDIO_GETGEO:
    828 	case LINUX_HDIO_GETGEO_BIG:
    829 		/*
    830 		 * Try to mimic Linux behaviour: return the BIOS geometry
    831 		 * if possible (extending its # of cylinders if it's beyond
    832 		 * the 1023 limit), fall back to the MI geometry (i.e.
    833 		 * the real geometry) if not found, by returning an
    834 		 * error. See common/linux_hdio.c
    835 		 */
    836 		FILE_USE(fp);
    837 		bip = fd2biosinfo(p, fp);
    838 		ioctlf = fp->f_ops->fo_ioctl;
    839 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
    840 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
    841 		FILE_UNUSE(fp, p);
    842 		if (error != 0 && error1 != 0)
    843 			return error1;
    844 		labp = error != 0 ? &label : partp.disklab;
    845 		start = error1 != 0 ? partp.part->p_offset : 0;
    846 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    847 		    && bip->bi_cyl != 0) {
    848 			heads = bip->bi_head;
    849 			sectors = bip->bi_sec;
    850 			cylinders = bip->bi_cyl;
    851 			biostotal = heads * sectors * cylinders;
    852 			realtotal = labp->d_ntracks * labp->d_nsectors *
    853 			    labp->d_ncylinders;
    854 			if (realtotal > biostotal)
    855 				cylinders = realtotal / (heads * sectors);
    856 		} else {
    857 			heads = labp->d_ntracks;
    858 			cylinders = labp->d_ncylinders;
    859 			sectors = labp->d_nsectors;
    860 		}
    861 		if (com == LINUX_HDIO_GETGEO) {
    862 			hdg.start = start;
    863 			hdg.heads = heads;
    864 			hdg.cylinders = cylinders;
    865 			hdg.sectors = sectors;
    866 			return copyout(&hdg, SCARG(uap, data), sizeof hdg);
    867 		} else {
    868 			hdg_big.start = start;
    869 			hdg_big.heads = heads;
    870 			hdg_big.cylinders = cylinders;
    871 			hdg_big.sectors = sectors;
    872 			return copyout(&hdg_big, SCARG(uap, data),
    873 			    sizeof hdg_big);
    874 		}
    875 
    876 	default:
    877 		/*
    878 		 * Unknown to us. If it's on a device, just pass it through
    879 		 * using PTIOCLINUX, the device itself might be able to
    880 		 * make some sense of it.
    881 		 * XXX hack: if the function returns EJUSTRETURN,
    882 		 * it has stuffed a sysctl return value in pt.data.
    883 		 */
    884 		FILE_USE(fp);
    885 		ioctlf = fp->f_ops->fo_ioctl;
    886 		pt.com = SCARG(uap, com);
    887 		pt.data = SCARG(uap, data);
    888 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
    889 		FILE_UNUSE(fp, p);
    890 		if (error == EJUSTRETURN) {
    891 			retval[0] = (register_t)pt.data;
    892 			error = 0;
    893 		}
    894 
    895 		if (error == ENOTTY)
    896 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
    897 			    com));
    898 		return error;
    899 	}
    900 	SCARG(&bia, com) = com;
    901 	return sys_ioctl(p, &bia, retval);
    902 }
    903 
    904 /*
    905  * Set I/O permissions for a process. Just set the maximum level
    906  * right away (ignoring the argument), otherwise we would have
    907  * to rely on I/O permission maps, which are not implemented.
    908  */
    909 int
    910 linux_sys_iopl(p, v, retval)
    911 	struct proc *p;
    912 	void *v;
    913 	register_t *retval;
    914 {
    915 #if 0
    916 	struct linux_sys_iopl_args /* {
    917 		syscallarg(int) level;
    918 	} */ *uap = v;
    919 #endif
    920 	struct trapframe *fp = p->p_md.md_regs;
    921 
    922 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    923 		return EPERM;
    924 	fp->tf_eflags |= PSL_IOPL;
    925 	*retval = 0;
    926 	return 0;
    927 }
    928 
    929 /*
    930  * See above. If a root process tries to set access to an I/O port,
    931  * just let it have the whole range.
    932  */
    933 int
    934 linux_sys_ioperm(p, v, retval)
    935 	struct proc *p;
    936 	void *v;
    937 	register_t *retval;
    938 {
    939 	struct linux_sys_ioperm_args /* {
    940 		syscallarg(unsigned int) lo;
    941 		syscallarg(unsigned int) hi;
    942 		syscallarg(int) val;
    943 	} */ *uap = v;
    944 	struct trapframe *fp = p->p_md.md_regs;
    945 
    946 	if (suser(p->p_ucred, &p->p_acflag) != 0)
    947 		return EPERM;
    948 	if (SCARG(uap, val))
    949 		fp->tf_eflags |= PSL_IOPL;
    950 	*retval = 0;
    951 	return 0;
    952 }
    953