linux_machdep.c revision 1.83 1 /* $NetBSD: linux_machdep.c,v 1.83 2002/12/06 00:02:59 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.83 2002/12/06 00:02:59 christos Exp $");
41
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/syscallargs.h>
66 #include <sys/filedesc.h>
67 #include <sys/exec_elf.h>
68 #include <sys/disklabel.h>
69 #include <sys/ioctl.h>
70 #include <miscfs/specfs/specdev.h>
71
72 #include <compat/linux/common/linux_types.h>
73 #include <compat/linux/common/linux_signal.h>
74 #include <compat/linux/common/linux_util.h>
75 #include <compat/linux/common/linux_ioctl.h>
76 #include <compat/linux/common/linux_hdio.h>
77 #include <compat/linux/common/linux_exec.h>
78 #include <compat/linux/common/linux_machdep.h>
79
80 #include <compat/linux/linux_syscallargs.h>
81
82 #include <machine/cpu.h>
83 #include <machine/cpufunc.h>
84 #include <machine/psl.h>
85 #include <machine/reg.h>
86 #include <machine/segments.h>
87 #include <machine/specialreg.h>
88 #include <machine/sysarch.h>
89 #include <machine/vm86.h>
90 #include <machine/vmparam.h>
91
92 /*
93 * To see whether wscons is configured (for virtual console ioctl calls).
94 */
95 #if defined(_KERNEL_OPT)
96 #include "wsdisplay.h"
97 #endif
98 #if (NWSDISPLAY > 0)
99 #include <dev/wscons/wsconsio.h>
100 #include <dev/wscons/wsdisplay_usl_io.h>
101 #if defined(_KERNEL_OPT)
102 #include "opt_xserver.h"
103 #endif
104 #endif
105
106 #ifdef USER_LDT
107 #include <machine/cpu.h>
108 int linux_read_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
109 register_t *));
110 int linux_write_ldt __P((struct proc *, struct linux_sys_modify_ldt_args *,
111 register_t *));
112 #endif
113
114 #ifdef DEBUG_LINUX
115 #define DPRINTF(a) uprintf a
116 #else
117 #define DPRINTF(a)
118 #endif
119
120 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
121 extern struct disklist *i386_alldisks;
122 static void linux_savecontext __P((struct proc *, struct trapframe *,
123 sigset_t *, struct linux_sigcontext *));
124 static void linux_rt_sendsig __P((int, sigset_t *, u_long));
125 static void linux_old_sendsig __P((int, sigset_t *, u_long));
126
127 extern char linux_sigcode[], linux_rt_sigcode[];
128 /*
129 * Deal with some i386-specific things in the Linux emulation code.
130 */
131
132 void
133 linux_setregs(p, epp, stack)
134 struct proc *p;
135 struct exec_package *epp;
136 u_long stack;
137 {
138 struct pcb *pcb = &p->p_addr->u_pcb;
139 struct trapframe *tf;
140
141 #if NNPX > 0
142 /* If we were using the FPU, forget about it. */
143 if (npxproc == p)
144 npxdrop();
145 #endif
146
147 #ifdef USER_LDT
148 pmap_ldt_cleanup(p);
149 #endif
150
151 p->p_md.md_flags &= ~MDP_USEDFPU;
152
153 if (i386_use_fxsave) {
154 pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
155 pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
156 } else
157 pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
158
159 tf = p->p_md.md_regs;
160 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
161 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
162 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
163 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
164 tf->tf_edi = 0;
165 tf->tf_esi = 0;
166 tf->tf_ebp = 0;
167 tf->tf_ebx = (int)p->p_psstr;
168 tf->tf_edx = 0;
169 tf->tf_ecx = 0;
170 tf->tf_eax = 0;
171 tf->tf_eip = epp->ep_entry;
172 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
173 tf->tf_eflags = PSL_USERSET;
174 tf->tf_esp = stack;
175 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
176 }
177
178 /*
179 * Send an interrupt to process.
180 *
181 * Stack is set up to allow sigcode stored
182 * in u. to call routine, followed by kcall
183 * to sigreturn routine below. After sigreturn
184 * resets the signal mask, the stack, and the
185 * frame pointer, it returns to the user
186 * specified pc, psl.
187 */
188
189 void
190 linux_sendsig(sig, mask, code)
191 int sig;
192 sigset_t *mask;
193 u_long code;
194 {
195 if (SIGACTION(curproc, sig).sa_flags & SA_SIGINFO)
196 linux_rt_sendsig(sig, mask, code);
197 else
198 linux_old_sendsig(sig, mask, code);
199 }
200
201
202 static void
203 linux_savecontext(p, tf, mask, sc)
204 struct proc *p;
205 struct trapframe *tf;
206 sigset_t *mask;
207 struct linux_sigcontext *sc;
208 {
209 /* Save register context. */
210 #ifdef VM86
211 if (tf->tf_eflags & PSL_VM) {
212 sc->sc_gs = tf->tf_vm86_gs;
213 sc->sc_fs = tf->tf_vm86_fs;
214 sc->sc_es = tf->tf_vm86_es;
215 sc->sc_ds = tf->tf_vm86_ds;
216 sc->sc_eflags = get_vflags(p);
217 } else
218 #endif
219 {
220 sc->sc_gs = tf->tf_gs;
221 sc->sc_fs = tf->tf_fs;
222 sc->sc_es = tf->tf_es;
223 sc->sc_ds = tf->tf_ds;
224 sc->sc_eflags = tf->tf_eflags;
225 }
226 sc->sc_edi = tf->tf_edi;
227 sc->sc_esi = tf->tf_esi;
228 sc->sc_esp = tf->tf_esp;
229 sc->sc_ebp = tf->tf_ebp;
230 sc->sc_ebx = tf->tf_ebx;
231 sc->sc_edx = tf->tf_edx;
232 sc->sc_ecx = tf->tf_ecx;
233 sc->sc_eax = tf->tf_eax;
234 sc->sc_eip = tf->tf_eip;
235 sc->sc_cs = tf->tf_cs;
236 sc->sc_esp_at_signal = tf->tf_esp;
237 sc->sc_ss = tf->tf_ss;
238 sc->sc_err = tf->tf_err;
239 sc->sc_trapno = tf->tf_trapno;
240 sc->sc_cr2 = p->p_addr->u_pcb.pcb_cr2;
241 sc->sc_387 = NULL;
242
243 /* Save signal stack. */
244 /* Linux doesn't save the onstack flag in sigframe */
245
246 /* Save signal mask. */
247 native_to_linux_old_sigset(&sc->sc_mask, mask);
248 }
249
250 static void
251 linux_rt_sendsig(sig, mask, code)
252 int sig;
253 sigset_t *mask;
254 u_long code;
255 {
256 struct proc *p = curproc;
257 struct trapframe *tf;
258 struct linux_rt_sigframe *fp, frame;
259 int onstack;
260 sig_t catcher = SIGACTION(p, sig).sa_handler;
261 struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
262
263 tf = p->p_md.md_regs;
264
265 /* Do we need to jump onto the signal stack? */
266 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
267 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
268
269
270 /* Allocate space for the signal handler context. */
271 if (onstack)
272 fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
273 sas->ss_size);
274 else
275 fp = (struct linux_rt_sigframe *)tf->tf_esp;
276 fp--;
277
278 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x\n", onstack, fp,
279 sig, tf->tf_eip));
280
281 /* Build stack frame for signal trampoline. */
282 frame.sf_handler = catcher;
283 frame.sf_sig = native_to_linux_signo[sig];
284 frame.sf_sip = &fp->sf_si;
285 frame.sf_scp = &fp->sf_sc;
286
287 /*
288 * XXX: zero siginfo out until we provide more info.
289 */
290 (void)memset(&frame.sf_si, 0, sizeof(frame.sf_si));
291
292 /* Save register context. */
293 linux_savecontext(p, tf, mask, &frame.sf_sc);
294
295 if (copyout(&frame, fp, sizeof(frame)) != 0) {
296 /*
297 * Process has trashed its stack; give it an illegal
298 * instruction to halt it in its tracks.
299 */
300 sigexit(p, SIGILL);
301 /* NOTREACHED */
302 }
303
304 /*
305 * Build context to run handler in.
306 */
307 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
308 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
309 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
310 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
311 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
312 (linux_rt_sigcode - linux_sigcode);
313 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
314 tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
315 tf->tf_esp = (int)fp;
316 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
317
318 /* Remember that we're now on the signal stack. */
319 if (onstack)
320 sas->ss_flags |= SS_ONSTACK;
321 }
322
323 static void
324 linux_old_sendsig(sig, mask, code)
325 int sig;
326 sigset_t *mask;
327 u_long code;
328 {
329 struct proc *p = curproc;
330 struct trapframe *tf;
331 struct linux_sigframe *fp, frame;
332 int onstack;
333 sig_t catcher = SIGACTION(p, sig).sa_handler;
334 struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
335
336 tf = p->p_md.md_regs;
337
338 /* Do we need to jump onto the signal stack? */
339 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
340 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
341
342 /* Allocate space for the signal handler context. */
343 if (onstack)
344 fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
345 sas->ss_size);
346 else
347 fp = (struct linux_sigframe *)tf->tf_esp;
348 fp--;
349
350 DPRINTF((uprintf("old: onstack = %d, fp = %p sig = %d eip = 0x%x\n",
351 onstack, fp, sig, tf->tf_eip));
352
353 /* Build stack frame for signal trampoline. */
354 frame.sf_handler = catcher;
355 frame.sf_sig = native_to_linux_signo[sig];
356
357 linux_savecontext(p, tf, mask, &frame.sf_sc);
358
359 if (copyout(&frame, fp, sizeof(frame)) != 0) {
360 /*
361 * Process has trashed its stack; give it an illegal
362 * instruction to halt it in its tracks.
363 */
364 sigexit(p, SIGILL);
365 /* NOTREACHED */
366 }
367
368 /*
369 * Build context to run handler in.
370 */
371 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
372 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
373 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
374 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
375 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
376 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
377 tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
378 tf->tf_esp = (int)fp;
379 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
380
381 /* Remember that we're now on the signal stack. */
382 if (onstack)
383 sas->ss_flags |= SS_ONSTACK;
384 }
385
386 /*
387 * System call to cleanup state after a signal
388 * has been taken. Reset signal mask and
389 * stack state from context left by sendsig (above).
390 * Return to previous pc and psl as specified by
391 * context left by sendsig. Check carefully to
392 * make sure that the user has not modified the
393 * psl to gain improper privileges or to cause
394 * a machine fault.
395 */
396 int
397 linux_sys_rt_sigreturn(p, v, retval)
398 struct proc *p;
399 void *v;
400 register_t *retval;
401 {
402 /* XXX XAX write me */
403 return(ENOSYS);
404 }
405
406 int
407 linux_sys_sigreturn(p, v, retval)
408 struct proc *p;
409 void *v;
410 register_t *retval;
411 {
412 struct linux_sys_sigreturn_args /* {
413 syscallarg(struct linux_sigcontext *) scp;
414 } */ *uap = v;
415 struct linux_sigcontext *scp, context;
416 struct trapframe *tf;
417 sigset_t mask;
418 ssize_t ss_gap;
419 struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
420
421 /*
422 * The trampoline code hands us the context.
423 * It is unsafe to keep track of it ourselves, in the event that a
424 * program jumps out of a signal handler.
425 */
426 scp = SCARG(uap, scp);
427 if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
428 return EFAULT;
429
430 /* Restore register context. */
431 tf = p->p_md.md_regs;
432
433 DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
434 #ifdef VM86
435 if (context.sc_eflags & PSL_VM) {
436 void syscall_vm86 __P((struct trapframe));
437
438 tf->tf_vm86_gs = context.sc_gs;
439 tf->tf_vm86_fs = context.sc_fs;
440 tf->tf_vm86_es = context.sc_es;
441 tf->tf_vm86_ds = context.sc_ds;
442 set_vflags(p, context.sc_eflags);
443 p->p_md.md_syscall = syscall_vm86;
444 } else
445 #endif
446 {
447 /*
448 * Check for security violations. If we're returning to
449 * protected mode, the CPU will validate the segment registers
450 * automatically and generate a trap on violations. We handle
451 * the trap, rather than doing all of the checking here.
452 */
453 if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
454 !USERMODE(context.sc_cs, context.sc_eflags))
455 return EINVAL;
456
457 tf->tf_gs = context.sc_gs;
458 tf->tf_fs = context.sc_fs;
459 tf->tf_es = context.sc_es;
460 tf->tf_ds = context.sc_ds;
461 #ifdef VM86
462 if (tf->tf_eflags & PSL_VM)
463 (*p->p_emul->e_syscall_intern)(p);
464 #endif
465 tf->tf_eflags = context.sc_eflags;
466 }
467 tf->tf_edi = context.sc_edi;
468 tf->tf_esi = context.sc_esi;
469 tf->tf_ebp = context.sc_ebp;
470 tf->tf_ebx = context.sc_ebx;
471 tf->tf_edx = context.sc_edx;
472 tf->tf_ecx = context.sc_ecx;
473 tf->tf_eax = context.sc_eax;
474 tf->tf_eip = context.sc_eip;
475 tf->tf_cs = context.sc_cs;
476 tf->tf_esp = context.sc_esp_at_signal;
477 tf->tf_ss = context.sc_ss;
478
479 /* Restore signal stack. */
480 /*
481 * Linux really does it this way; it doesn't have space in sigframe
482 * to save the onstack flag.
483 */
484 ss_gap = (ssize_t)
485 ((caddr_t) context.sc_esp_at_signal - (caddr_t) sas->ss_sp);
486 if (ss_gap >= 0 && ss_gap < sas->ss_size)
487 sas->ss_flags |= SS_ONSTACK;
488 else
489 sas->ss_flags &= ~SS_ONSTACK;
490
491 /* Restore signal mask. */
492 linux_old_to_native_sigset(&mask, &context.sc_mask);
493 (void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
494 DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
495 return EJUSTRETURN;
496 }
497
498 #ifdef USER_LDT
499
500 int
501 linux_read_ldt(p, uap, retval)
502 struct proc *p;
503 struct linux_sys_modify_ldt_args /* {
504 syscallarg(int) func;
505 syscallarg(void *) ptr;
506 syscallarg(size_t) bytecount;
507 } */ *uap;
508 register_t *retval;
509 {
510 struct i386_get_ldt_args gl;
511 int error;
512 caddr_t sg;
513 char *parms;
514
515 DPRINTF(("linux_read_ldt!"));
516 sg = stackgap_init(p, 0);
517
518 gl.start = 0;
519 gl.desc = SCARG(uap, ptr);
520 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
521
522 parms = stackgap_alloc(p, &sg, sizeof(gl));
523
524 if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
525 return (error);
526
527 if ((error = i386_get_ldt(p, parms, retval)) != 0)
528 return (error);
529
530 *retval *= sizeof(union descriptor);
531 return (0);
532 }
533
534 struct linux_ldt_info {
535 u_int entry_number;
536 u_long base_addr;
537 u_int limit;
538 u_int seg_32bit:1;
539 u_int contents:2;
540 u_int read_exec_only:1;
541 u_int limit_in_pages:1;
542 u_int seg_not_present:1;
543 u_int useable:1;
544 };
545
546 int
547 linux_write_ldt(p, uap, retval)
548 struct proc *p;
549 struct linux_sys_modify_ldt_args /* {
550 syscallarg(int) func;
551 syscallarg(void *) ptr;
552 syscallarg(size_t) bytecount;
553 } */ *uap;
554 register_t *retval;
555 {
556 struct linux_ldt_info ldt_info;
557 struct segment_descriptor sd;
558 struct i386_set_ldt_args sl;
559 int error;
560 caddr_t sg;
561 char *parms;
562 int oldmode = (int)retval[0];
563
564 DPRINTF(("linux_write_ldt %d\n", oldmode));
565 if (SCARG(uap, bytecount) != sizeof(ldt_info))
566 return (EINVAL);
567 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
568 return error;
569 if (ldt_info.entry_number >= 8192)
570 return (EINVAL);
571 if (ldt_info.contents == 3) {
572 if (oldmode)
573 return (EINVAL);
574 if (ldt_info.seg_not_present)
575 return (EINVAL);
576 }
577
578 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
579 (oldmode || (ldt_info.contents == 0 &&
580 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
581 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
582 ldt_info.useable == 0))) {
583 /* this means you should zero the ldt */
584 (void)memset(&sd, 0, sizeof(sd));
585 } else {
586 sd.sd_lobase = ldt_info.base_addr & 0xffffff;
587 sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
588 sd.sd_lolimit = ldt_info.limit & 0xffff;
589 sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
590 sd.sd_type = 16 | (ldt_info.contents << 2) |
591 (!ldt_info.read_exec_only << 1);
592 sd.sd_dpl = SEL_UPL;
593 sd.sd_p = !ldt_info.seg_not_present;
594 sd.sd_def32 = ldt_info.seg_32bit;
595 sd.sd_gran = ldt_info.limit_in_pages;
596 if (!oldmode)
597 sd.sd_xx = ldt_info.useable;
598 else
599 sd.sd_xx = 0;
600 }
601 sg = stackgap_init(p, 0);
602 sl.start = ldt_info.entry_number;
603 sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
604 sl.num = 1;
605
606 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
607 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
608
609 parms = stackgap_alloc(p, &sg, sizeof(sl));
610
611 if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
612 return (error);
613 if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
614 return (error);
615
616 if ((error = i386_set_ldt(p, parms, retval)) != 0)
617 return (error);
618
619 *retval = 0;
620 return (0);
621 }
622
623 #endif /* USER_LDT */
624
625 int
626 linux_sys_modify_ldt(p, v, retval)
627 struct proc *p;
628 void *v;
629 register_t *retval;
630 {
631 struct linux_sys_modify_ldt_args /* {
632 syscallarg(int) func;
633 syscallarg(void *) ptr;
634 syscallarg(size_t) bytecount;
635 } */ *uap = v;
636
637 switch (SCARG(uap, func)) {
638 #ifdef USER_LDT
639 case 0:
640 return linux_read_ldt(p, uap, retval);
641 case 1:
642 retval[0] = 1;
643 return linux_write_ldt(p, uap, retval);
644 case 2:
645 #ifdef notyet
646 return (linux_read_default_ldt(p, uap, retval);
647 #else
648 return (ENOSYS);
649 #endif
650 case 0x11:
651 retval[0] = 0;
652 return linux_write_ldt(p, uap, retval);
653 #endif /* USER_LDT */
654
655 default:
656 return (ENOSYS);
657 }
658 }
659
660 /*
661 * XXX Pathetic hack to make svgalib work. This will fake the major
662 * device number of an opened VT so that svgalib likes it. grmbl.
663 * Should probably do it 'wrong the right way' and use a mapping
664 * array for all major device numbers, and map linux_mknod too.
665 */
666 dev_t
667 linux_fakedev(dev, raw)
668 dev_t dev;
669 int raw;
670 {
671 if (raw) {
672 #if (NWSDISPLAY > 0)
673 extern const struct cdevsw wsdisplay_cdevsw;
674 if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
675 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
676 #endif
677 }
678
679 return dev;
680 }
681
682 #if (NWSDISPLAY > 0)
683 /*
684 * That's not complete, but enough to get an X server running.
685 */
686 #define NR_KEYS 128
687 static const u_short plain_map[NR_KEYS] = {
688 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
689 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
690 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
691 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
692 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
693 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
694 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
695 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
696 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
697 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
698 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
699 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
700 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
701 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
702 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
703 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
704 }, shift_map[NR_KEYS] = {
705 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
706 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
707 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
708 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
709 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
710 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
711 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
712 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
713 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
714 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
715 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
716 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
717 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
718 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
719 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
720 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
721 }, altgr_map[NR_KEYS] = {
722 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
723 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
724 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
725 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
726 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
727 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
728 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
729 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
730 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
731 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
732 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
733 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
734 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
735 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
736 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
737 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
738 }, ctrl_map[NR_KEYS] = {
739 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
740 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
741 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
742 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
743 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
744 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
745 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
746 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
747 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
748 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
749 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
750 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
751 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
752 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
753 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
754 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
755 };
756
757 const u_short * const linux_keytabs[] = {
758 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
759 };
760 #endif
761
762 static struct biosdisk_info *
763 fd2biosinfo(p, fp)
764 struct proc *p;
765 struct file *fp;
766 {
767 struct vnode *vp;
768 const char *blkname;
769 char diskname[16];
770 int i;
771 struct nativedisk_info *nip;
772 struct disklist *dl = i386_alldisks;
773
774 if (fp->f_type != DTYPE_VNODE)
775 return NULL;
776 vp = (struct vnode *)fp->f_data;
777
778 if (vp->v_type != VBLK)
779 return NULL;
780
781 blkname = devsw_blk2name(major(vp->v_rdev));
782 snprintf(diskname, sizeof diskname, "%s%u", blkname,
783 DISKUNIT(vp->v_rdev));
784
785 for (i = 0; i < dl->dl_nnativedisks; i++) {
786 nip = &dl->dl_nativedisks[i];
787 if (strcmp(diskname, nip->ni_devname))
788 continue;
789 if (nip->ni_nmatches != 0)
790 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
791 }
792
793 return NULL;
794 }
795
796
797 /*
798 * We come here in a last attempt to satisfy a Linux ioctl() call
799 */
800 int
801 linux_machdepioctl(p, v, retval)
802 struct proc *p;
803 void *v;
804 register_t *retval;
805 {
806 struct linux_sys_ioctl_args /* {
807 syscallarg(int) fd;
808 syscallarg(u_long) com;
809 syscallarg(caddr_t) data;
810 } */ *uap = v;
811 struct sys_ioctl_args bia;
812 u_long com;
813 int error, error1;
814 #if (NWSDISPLAY > 0)
815 struct vt_mode lvt;
816 caddr_t bvtp, sg;
817 struct kbentry kbe;
818 #endif
819 struct linux_hd_geometry hdg;
820 struct linux_hd_big_geometry hdg_big;
821 struct biosdisk_info *bip;
822 struct filedesc *fdp;
823 struct file *fp;
824 int fd;
825 struct disklabel label, *labp;
826 struct partinfo partp;
827 int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
828 u_long start, biostotal, realtotal;
829 u_char heads, sectors;
830 u_int cylinders;
831 struct ioctl_pt pt;
832
833 fd = SCARG(uap, fd);
834 SCARG(&bia, fd) = fd;
835 SCARG(&bia, data) = SCARG(uap, data);
836 com = SCARG(uap, com);
837
838 fdp = p->p_fd;
839
840 if ((fp = fd_getfile(fdp, fd)) == NULL)
841 return (EBADF);
842
843 switch (com) {
844 #if (NWSDISPLAY > 0)
845 case LINUX_KDGKBMODE:
846 com = KDGKBMODE;
847 break;
848 case LINUX_KDSKBMODE:
849 com = KDSKBMODE;
850 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
851 SCARG(&bia, data) = (caddr_t)K_RAW;
852 break;
853 case LINUX_KIOCSOUND:
854 SCARG(&bia, data) =
855 (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
856 /* fall through */
857 case LINUX_KDMKTONE:
858 com = KDMKTONE;
859 break;
860 case LINUX_KDSETMODE:
861 com = KDSETMODE;
862 break;
863 case LINUX_KDGETMODE:
864 /* KD_* values are equal to the wscons numbers */
865 com = WSDISPLAYIO_GMODE;
866 break;
867 case LINUX_KDENABIO:
868 com = KDENABIO;
869 break;
870 case LINUX_KDDISABIO:
871 com = KDDISABIO;
872 break;
873 case LINUX_KDGETLED:
874 com = KDGETLED;
875 break;
876 case LINUX_KDSETLED:
877 com = KDSETLED;
878 break;
879 case LINUX_VT_OPENQRY:
880 com = VT_OPENQRY;
881 break;
882 case LINUX_VT_GETMODE:
883 SCARG(&bia, com) = VT_GETMODE;
884 if ((error = sys_ioctl(p, &bia, retval)))
885 return error;
886 if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
887 sizeof (struct vt_mode))))
888 return error;
889 lvt.relsig = native_to_linux_signo[lvt.relsig];
890 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
891 lvt.frsig = native_to_linux_signo[lvt.frsig];
892 return copyout((caddr_t)&lvt, SCARG(uap, data),
893 sizeof (struct vt_mode));
894 case LINUX_VT_SETMODE:
895 com = VT_SETMODE;
896 if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
897 sizeof (struct vt_mode))))
898 return error;
899 lvt.relsig = linux_to_native_signo[lvt.relsig];
900 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
901 lvt.frsig = linux_to_native_signo[lvt.frsig];
902 sg = stackgap_init(p, 0);
903 bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
904 if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
905 return error;
906 SCARG(&bia, data) = bvtp;
907 break;
908 case LINUX_VT_DISALLOCATE:
909 /* XXX should use WSDISPLAYIO_DELSCREEN */
910 return 0;
911 case LINUX_VT_RELDISP:
912 com = VT_RELDISP;
913 break;
914 case LINUX_VT_ACTIVATE:
915 com = VT_ACTIVATE;
916 break;
917 case LINUX_VT_WAITACTIVE:
918 com = VT_WAITACTIVE;
919 break;
920 case LINUX_VT_GETSTATE:
921 com = VT_GETSTATE;
922 break;
923 case LINUX_KDGKBTYPE:
924 /* This is what Linux does. */
925 return (subyte(SCARG(uap, data), KB_101));
926 case LINUX_KDGKBENT:
927 /*
928 * The Linux KDGKBENT ioctl is different from the
929 * SYSV original. So we handle it in machdep code.
930 * XXX We should use keyboard mapping information
931 * from wsdisplay, but this would be expensive.
932 */
933 if ((error = copyin(SCARG(uap, data), &kbe,
934 sizeof(struct kbentry))))
935 return (error);
936 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
937 || kbe.kb_index >= NR_KEYS)
938 return (EINVAL);
939 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
940 return (copyout(&kbe, SCARG(uap, data),
941 sizeof(struct kbentry)));
942 #endif
943 case LINUX_HDIO_GETGEO:
944 case LINUX_HDIO_GETGEO_BIG:
945 /*
946 * Try to mimic Linux behaviour: return the BIOS geometry
947 * if possible (extending its # of cylinders if it's beyond
948 * the 1023 limit), fall back to the MI geometry (i.e.
949 * the real geometry) if not found, by returning an
950 * error. See common/linux_hdio.c
951 */
952 FILE_USE(fp);
953 bip = fd2biosinfo(p, fp);
954 ioctlf = fp->f_ops->fo_ioctl;
955 error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
956 error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
957 FILE_UNUSE(fp, p);
958 if (error != 0 && error1 != 0)
959 return error1;
960 labp = error != 0 ? &label : partp.disklab;
961 start = error1 != 0 ? partp.part->p_offset : 0;
962 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
963 && bip->bi_cyl != 0) {
964 heads = bip->bi_head;
965 sectors = bip->bi_sec;
966 cylinders = bip->bi_cyl;
967 biostotal = heads * sectors * cylinders;
968 realtotal = labp->d_ntracks * labp->d_nsectors *
969 labp->d_ncylinders;
970 if (realtotal > biostotal)
971 cylinders = realtotal / (heads * sectors);
972 } else {
973 heads = labp->d_ntracks;
974 cylinders = labp->d_ncylinders;
975 sectors = labp->d_nsectors;
976 }
977 if (com == LINUX_HDIO_GETGEO) {
978 hdg.start = start;
979 hdg.heads = heads;
980 hdg.cylinders = cylinders;
981 hdg.sectors = sectors;
982 return copyout(&hdg, SCARG(uap, data), sizeof hdg);
983 } else {
984 hdg_big.start = start;
985 hdg_big.heads = heads;
986 hdg_big.cylinders = cylinders;
987 hdg_big.sectors = sectors;
988 return copyout(&hdg_big, SCARG(uap, data),
989 sizeof hdg_big);
990 }
991
992 default:
993 /*
994 * Unknown to us. If it's on a device, just pass it through
995 * using PTIOCLINUX, the device itself might be able to
996 * make some sense of it.
997 * XXX hack: if the function returns EJUSTRETURN,
998 * it has stuffed a sysctl return value in pt.data.
999 */
1000 FILE_USE(fp);
1001 ioctlf = fp->f_ops->fo_ioctl;
1002 pt.com = SCARG(uap, com);
1003 pt.data = SCARG(uap, data);
1004 error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
1005 FILE_UNUSE(fp, p);
1006 if (error == EJUSTRETURN) {
1007 retval[0] = (register_t)pt.data;
1008 error = 0;
1009 }
1010
1011 if (error == ENOTTY)
1012 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1013 com));
1014 return error;
1015 }
1016 SCARG(&bia, com) = com;
1017 return sys_ioctl(p, &bia, retval);
1018 }
1019
1020 /*
1021 * Set I/O permissions for a process. Just set the maximum level
1022 * right away (ignoring the argument), otherwise we would have
1023 * to rely on I/O permission maps, which are not implemented.
1024 */
1025 int
1026 linux_sys_iopl(p, v, retval)
1027 struct proc *p;
1028 void *v;
1029 register_t *retval;
1030 {
1031 #if 0
1032 struct linux_sys_iopl_args /* {
1033 syscallarg(int) level;
1034 } */ *uap = v;
1035 #endif
1036 struct trapframe *fp = p->p_md.md_regs;
1037
1038 if (suser(p->p_ucred, &p->p_acflag) != 0)
1039 return EPERM;
1040 fp->tf_eflags |= PSL_IOPL;
1041 *retval = 0;
1042 return 0;
1043 }
1044
1045 /*
1046 * See above. If a root process tries to set access to an I/O port,
1047 * just let it have the whole range.
1048 */
1049 int
1050 linux_sys_ioperm(p, v, retval)
1051 struct proc *p;
1052 void *v;
1053 register_t *retval;
1054 {
1055 struct linux_sys_ioperm_args /* {
1056 syscallarg(unsigned int) lo;
1057 syscallarg(unsigned int) hi;
1058 syscallarg(int) val;
1059 } */ *uap = v;
1060 struct trapframe *fp = p->p_md.md_regs;
1061
1062 if (suser(p->p_ucred, &p->p_acflag) != 0)
1063 return EPERM;
1064 if (SCARG(uap, val))
1065 fp->tf_eflags |= PSL_IOPL;
1066 *retval = 0;
1067 return 0;
1068 }
1069