linux_machdep.c revision 1.156 1 /* $NetBSD: linux_machdep.c,v 1.156 2014/01/26 19:16:17 dsl Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.156 2014/01/26 19:16:17 dsl Exp $");
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_vm86.h"
37 #include "opt_user_ldt.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/kernel.h>
44 #include <sys/proc.h>
45 #include <sys/buf.h>
46 #include <sys/reboot.h>
47 #include <sys/conf.h>
48 #include <sys/exec.h>
49 #include <sys/file.h>
50 #include <sys/callout.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/msgbuf.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/device.h>
57 #include <sys/syscallargs.h>
58 #include <sys/filedesc.h>
59 #include <sys/exec_elf.h>
60 #include <sys/disklabel.h>
61 #include <sys/ioctl.h>
62 #include <sys/wait.h>
63 #include <sys/kauth.h>
64 #include <sys/kmem.h>
65
66 #include <miscfs/specfs/specdev.h>
67
68 #include <compat/linux/common/linux_types.h>
69 #include <compat/linux/common/linux_signal.h>
70 #include <compat/linux/common/linux_util.h>
71 #include <compat/linux/common/linux_ioctl.h>
72 #include <compat/linux/common/linux_hdio.h>
73 #include <compat/linux/common/linux_exec.h>
74 #include <compat/linux/common/linux_machdep.h>
75 #include <compat/linux/common/linux_errno.h>
76
77 #include <compat/linux/linux_syscallargs.h>
78
79 #include <sys/cpu.h>
80 #include <machine/cpufunc.h>
81 #include <machine/psl.h>
82 #include <machine/reg.h>
83 #include <machine/segments.h>
84 #include <machine/specialreg.h>
85 #include <machine/sysarch.h>
86 #include <machine/vm86.h>
87 #include <machine/vmparam.h>
88
89 /*
90 * To see whether wscons is configured (for virtual console ioctl calls).
91 */
92 #if defined(_KERNEL_OPT)
93 #include "wsdisplay.h"
94 #endif
95 #if (NWSDISPLAY > 0)
96 #include <dev/wscons/wsconsio.h>
97 #include <dev/wscons/wsdisplay_usl_io.h>
98 #if defined(_KERNEL_OPT)
99 #include "opt_xserver.h"
100 #endif
101 #endif
102
103 #ifdef DEBUG_LINUX
104 #define DPRINTF(a) uprintf a
105 #else
106 #define DPRINTF(a)
107 #endif
108
109 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
110 extern struct disklist *x86_alldisks;
111 static void linux_save_ucontext(struct lwp *, struct trapframe *,
112 const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
113 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
114 const sigset_t *, struct linux_sigcontext *);
115 static int linux_restore_sigcontext(struct lwp *,
116 struct linux_sigcontext *, register_t *);
117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
119
120 extern char linux_sigcode[], linux_rt_sigcode[];
121
122 /*
123 * Deal with some i386-specific things in the Linux emulation code.
124 */
125
126 void
127 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
128 {
129 struct pcb *pcb = lwp_getpcb(l);
130 struct trapframe *tf;
131
132 /* If we were using the FPU, forget about it. */
133 if (pcb->pcb_fpcpu != NULL)
134 fpusave_lwp(l, false);
135
136
137 #ifdef USER_LDT
138 pmap_ldt_cleanup(l);
139 #endif
140
141 l->l_md.md_flags &= ~MDL_USEDFPU;
142
143 if (i386_use_fxsave) {
144 pcb->pcb_savefpu.sv_xmm.fx_cw = __Linux_NPXCW__;
145 pcb->pcb_savefpu.sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
146 } else
147 pcb->pcb_savefpu.sv_87.s87_cw = __Linux_NPXCW__;
148
149 tf = l->l_md.md_regs;
150 tf->tf_gs = 0;
151 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
152 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
153 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
154 tf->tf_edi = 0;
155 tf->tf_esi = 0;
156 tf->tf_ebp = 0;
157 tf->tf_ebx = l->l_proc->p_psstrp;
158 tf->tf_edx = 0;
159 tf->tf_ecx = 0;
160 tf->tf_eax = 0;
161 tf->tf_eip = epp->ep_entry;
162 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
163 tf->tf_eflags = PSL_USERSET;
164 tf->tf_esp = stack;
165 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
166 }
167
168 /*
169 * Send an interrupt to process.
170 *
171 * Stack is set up to allow sigcode stored
172 * in u. to call routine, followed by kcall
173 * to sigreturn routine below. After sigreturn
174 * resets the signal mask, the stack, and the
175 * frame pointer, it returns to the user
176 * specified pc, psl.
177 */
178
179 void
180 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
181 {
182 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
183 linux_rt_sendsig(ksi, mask);
184 else
185 linux_old_sendsig(ksi, mask);
186 }
187
188
189 static void
190 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
191 {
192 uc->uc_flags = 0;
193 uc->uc_link = NULL;
194 native_to_linux_sigaltstack(&uc->uc_stack, sas);
195 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
196 native_to_linux_sigset(&uc->uc_sigmask, mask);
197 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
198 }
199
200 static void
201 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
202 const sigset_t *mask, struct linux_sigcontext *sc)
203 {
204 struct pcb *pcb = lwp_getpcb(l);
205
206 /* Save register context. */
207 #ifdef VM86
208 if (tf->tf_eflags & PSL_VM) {
209 sc->sc_gs = tf->tf_vm86_gs;
210 sc->sc_fs = tf->tf_vm86_fs;
211 sc->sc_es = tf->tf_vm86_es;
212 sc->sc_ds = tf->tf_vm86_ds;
213 sc->sc_eflags = get_vflags(l);
214 } else
215 #endif
216 {
217 sc->sc_gs = tf->tf_gs;
218 sc->sc_fs = tf->tf_fs;
219 sc->sc_es = tf->tf_es;
220 sc->sc_ds = tf->tf_ds;
221 sc->sc_eflags = tf->tf_eflags;
222 }
223 sc->sc_edi = tf->tf_edi;
224 sc->sc_esi = tf->tf_esi;
225 sc->sc_esp = tf->tf_esp;
226 sc->sc_ebp = tf->tf_ebp;
227 sc->sc_ebx = tf->tf_ebx;
228 sc->sc_edx = tf->tf_edx;
229 sc->sc_ecx = tf->tf_ecx;
230 sc->sc_eax = tf->tf_eax;
231 sc->sc_eip = tf->tf_eip;
232 sc->sc_cs = tf->tf_cs;
233 sc->sc_esp_at_signal = tf->tf_esp;
234 sc->sc_ss = tf->tf_ss;
235 sc->sc_err = tf->tf_err;
236 sc->sc_trapno = tf->tf_trapno;
237 sc->sc_cr2 = pcb->pcb_cr2;
238 sc->sc_387 = NULL;
239
240 /* Save signal stack. */
241 /* Linux doesn't save the onstack flag in sigframe */
242
243 /* Save signal mask. */
244 native_to_linux_old_sigset(&sc->sc_mask, mask);
245 }
246
247 static void
248 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
249 {
250 struct lwp *l = curlwp;
251 struct proc *p = l->l_proc;
252 struct trapframe *tf;
253 struct linux_rt_sigframe *fp, frame;
254 int onstack, error;
255 int sig = ksi->ksi_signo;
256 sig_t catcher = SIGACTION(p, sig).sa_handler;
257 struct sigaltstack *sas = &l->l_sigstk;
258
259 tf = l->l_md.md_regs;
260 /* Do we need to jump onto the signal stack? */
261 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
262 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
263
264
265 /* Allocate space for the signal handler context. */
266 if (onstack)
267 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
268 sas->ss_size);
269 else
270 fp = (struct linux_rt_sigframe *)tf->tf_esp;
271 fp--;
272
273 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
274 onstack, fp, sig, tf->tf_eip,
275 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
276
277 /* Build stack frame for signal trampoline. */
278 frame.sf_handler = catcher;
279 frame.sf_sig = native_to_linux_signo[sig];
280 frame.sf_sip = &fp->sf_si;
281 frame.sf_ucp = &fp->sf_uc;
282
283 /*
284 * XXX: the following code assumes that the constants for
285 * siginfo are the same between linux and NetBSD.
286 */
287 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info);
288
289 /* Save register context. */
290 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
291 sendsig_reset(l, sig);
292
293 mutex_exit(p->p_lock);
294 error = copyout(&frame, fp, sizeof(frame));
295 mutex_enter(p->p_lock);
296
297 if (error != 0) {
298 /*
299 * Process has trashed its stack; give it an illegal
300 * instruction to halt it in its tracks.
301 */
302 sigexit(l, SIGILL);
303 /* NOTREACHED */
304 }
305
306 /*
307 * Build context to run handler in.
308 */
309 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
310 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
311 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
312 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
313 (linux_rt_sigcode - linux_sigcode);
314 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
315 tf->tf_eflags &= ~PSL_CLEARSIG;
316 tf->tf_esp = (int)fp;
317 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
318
319 /* Remember that we're now on the signal stack. */
320 if (onstack)
321 sas->ss_flags |= SS_ONSTACK;
322 }
323
324 static void
325 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
326 {
327 struct lwp *l = curlwp;
328 struct proc *p = l->l_proc;
329 struct trapframe *tf;
330 struct linux_sigframe *fp, frame;
331 int onstack, error;
332 int sig = ksi->ksi_signo;
333 sig_t catcher = SIGACTION(p, sig).sa_handler;
334 struct sigaltstack *sas = &l->l_sigstk;
335
336 tf = l->l_md.md_regs;
337
338 /* Do we need to jump onto the signal stack? */
339 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
340 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
341
342 /* Allocate space for the signal handler context. */
343 if (onstack)
344 fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
345 sas->ss_size);
346 else
347 fp = (struct linux_sigframe *)tf->tf_esp;
348 fp--;
349
350 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
351 onstack, fp, sig, tf->tf_eip,
352 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
353
354 /* Build stack frame for signal trampoline. */
355 frame.sf_handler = catcher;
356 frame.sf_sig = native_to_linux_signo[sig];
357
358 linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
359 sendsig_reset(l, sig);
360
361 mutex_exit(p->p_lock);
362 error = copyout(&frame, fp, sizeof(frame));
363 mutex_enter(p->p_lock);
364
365 if (error != 0) {
366 /*
367 * Process has trashed its stack; give it an illegal
368 * instruction to halt it in its tracks.
369 */
370 sigexit(l, SIGILL);
371 /* NOTREACHED */
372 }
373
374 /*
375 * Build context to run handler in.
376 */
377 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
378 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
379 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
380 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
381 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
382 tf->tf_eflags &= ~PSL_CLEARSIG;
383 tf->tf_esp = (int)fp;
384 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
385
386 /* Remember that we're now on the signal stack. */
387 if (onstack)
388 sas->ss_flags |= SS_ONSTACK;
389 }
390
391 /*
392 * System call to cleanup state after a signal
393 * has been taken. Reset signal mask and
394 * stack state from context left by sendsig (above).
395 * Return to previous pc and psl as specified by
396 * context left by sendsig. Check carefully to
397 * make sure that the user has not modified the
398 * psl to gain improper privileges or to cause
399 * a machine fault.
400 */
401 int
402 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
403 {
404 /* {
405 syscallarg(struct linux_ucontext *) ucp;
406 } */
407 struct linux_ucontext context, *ucp = SCARG(uap, ucp);
408 int error;
409
410 /*
411 * The trampoline code hands us the context.
412 * It is unsafe to keep track of it ourselves, in the event that a
413 * program jumps out of a signal handler.
414 */
415 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
416 return error;
417
418 /* XXX XAX we can do better here by using more of the ucontext */
419 return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
420 }
421
422 int
423 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
424 {
425 /* {
426 syscallarg(struct linux_sigcontext *) scp;
427 } */
428 struct linux_sigcontext context, *scp = SCARG(uap, scp);
429 int error;
430
431 /*
432 * The trampoline code hands us the context.
433 * It is unsafe to keep track of it ourselves, in the event that a
434 * program jumps out of a signal handler.
435 */
436 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
437 return error;
438 return linux_restore_sigcontext(l, &context, retval);
439 }
440
441 static int
442 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
443 register_t *retval)
444 {
445 struct proc *p = l->l_proc;
446 struct sigaltstack *sas = &l->l_sigstk;
447 struct trapframe *tf;
448 sigset_t mask;
449 ssize_t ss_gap;
450
451 /* Restore register context. */
452 tf = l->l_md.md_regs;
453 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
454
455 #ifdef VM86
456 if (scp->sc_eflags & PSL_VM) {
457 void syscall_vm86(struct trapframe *);
458
459 tf->tf_vm86_gs = scp->sc_gs;
460 tf->tf_vm86_fs = scp->sc_fs;
461 tf->tf_vm86_es = scp->sc_es;
462 tf->tf_vm86_ds = scp->sc_ds;
463 set_vflags(l, scp->sc_eflags);
464 p->p_md.md_syscall = syscall_vm86;
465 } else
466 #endif
467 {
468 /*
469 * Check for security violations. If we're returning to
470 * protected mode, the CPU will validate the segment registers
471 * automatically and generate a trap on violations. We handle
472 * the trap, rather than doing all of the checking here.
473 */
474 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
475 !USERMODE(scp->sc_cs, scp->sc_eflags))
476 return EINVAL;
477
478 tf->tf_gs = scp->sc_gs;
479 tf->tf_fs = scp->sc_fs;
480 tf->tf_es = scp->sc_es;
481 tf->tf_ds = scp->sc_ds;
482 #ifdef VM86
483 if (tf->tf_eflags & PSL_VM)
484 (*p->p_emul->e_syscall_intern)(p);
485 #endif
486 tf->tf_eflags = scp->sc_eflags;
487 }
488 tf->tf_edi = scp->sc_edi;
489 tf->tf_esi = scp->sc_esi;
490 tf->tf_ebp = scp->sc_ebp;
491 tf->tf_ebx = scp->sc_ebx;
492 tf->tf_edx = scp->sc_edx;
493 tf->tf_ecx = scp->sc_ecx;
494 tf->tf_eax = scp->sc_eax;
495 tf->tf_eip = scp->sc_eip;
496 tf->tf_cs = scp->sc_cs;
497 tf->tf_esp = scp->sc_esp_at_signal;
498 tf->tf_ss = scp->sc_ss;
499
500 /* Restore signal stack. */
501 /*
502 * Linux really does it this way; it doesn't have space in sigframe
503 * to save the onstack flag.
504 */
505 mutex_enter(p->p_lock);
506 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
507 if (ss_gap >= 0 && ss_gap < sas->ss_size)
508 sas->ss_flags |= SS_ONSTACK;
509 else
510 sas->ss_flags &= ~SS_ONSTACK;
511
512 /* Restore signal mask. */
513 linux_old_to_native_sigset(&mask, &scp->sc_mask);
514 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
515 mutex_exit(p->p_lock);
516
517 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
518 return EJUSTRETURN;
519 }
520
521 #ifdef USER_LDT
522
523 static int
524 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
525 register_t *retval)
526 {
527 struct x86_get_ldt_args gl;
528 int error;
529 union descriptor *ldt_buf;
530 size_t sz;
531
532 /*
533 * I've checked the linux code - this function is asymetric with
534 * linux_write_ldt, and returns raw ldt entries.
535 * NB, the code I saw zerod the spare parts of the user buffer.
536 */
537
538 DPRINTF(("linux_read_ldt!"));
539
540 sz = 8192 * sizeof(*ldt_buf);
541 ldt_buf = kmem_zalloc(sz, KM_SLEEP);
542 gl.start = 0;
543 gl.desc = NULL;
544 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
545 error = x86_get_ldt1(l, &gl, ldt_buf);
546 /* NB gl.num might have changed */
547 if (error == 0) {
548 *retval = gl.num * sizeof *ldt;
549 error = copyout(ldt_buf, SCARG(uap, ptr),
550 gl.num * sizeof *ldt_buf);
551 }
552 kmem_free(ldt_buf, sz);
553
554 return error;
555 }
556
557 struct linux_ldt_info {
558 u_int entry_number;
559 u_long base_addr;
560 u_int limit;
561 u_int seg_32bit:1;
562 u_int contents:2;
563 u_int read_exec_only:1;
564 u_int limit_in_pages:1;
565 u_int seg_not_present:1;
566 u_int useable:1;
567 };
568
569 static int
570 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
571 int oldmode)
572 {
573 struct linux_ldt_info ldt_info;
574 union descriptor d;
575 struct x86_set_ldt_args sl;
576 int error;
577
578 DPRINTF(("linux_write_ldt %d\n", oldmode));
579 if (SCARG(uap, bytecount) != sizeof(ldt_info))
580 return (EINVAL);
581 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
582 return error;
583 if (ldt_info.entry_number >= 8192)
584 return (EINVAL);
585 if (ldt_info.contents == 3) {
586 if (oldmode)
587 return (EINVAL);
588 if (ldt_info.seg_not_present)
589 return (EINVAL);
590 }
591
592 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
593 (oldmode || (ldt_info.contents == 0 &&
594 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
595 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
596 ldt_info.useable == 0))) {
597 /* this means you should zero the ldt */
598 (void)memset(&d, 0, sizeof(d));
599 } else {
600 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
601 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
602 d.sd.sd_lolimit = ldt_info.limit & 0xffff;
603 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
604 d.sd.sd_type = 16 | (ldt_info.contents << 2) |
605 (!ldt_info.read_exec_only << 1);
606 d.sd.sd_dpl = SEL_UPL;
607 d.sd.sd_p = !ldt_info.seg_not_present;
608 d.sd.sd_def32 = ldt_info.seg_32bit;
609 d.sd.sd_gran = ldt_info.limit_in_pages;
610 if (!oldmode)
611 d.sd.sd_xx = ldt_info.useable;
612 else
613 d.sd.sd_xx = 0;
614 }
615 sl.start = ldt_info.entry_number;
616 sl.desc = NULL;
617 sl.num = 1;
618
619 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
620 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
621
622 return x86_set_ldt1(l, &sl, &d);
623 }
624
625 #endif /* USER_LDT */
626
627 int
628 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
629 {
630 /* {
631 syscallarg(int) func;
632 syscallarg(void *) ptr;
633 syscallarg(size_t) bytecount;
634 } */
635
636 switch (SCARG(uap, func)) {
637 #ifdef USER_LDT
638 case 0:
639 return linux_read_ldt(l, (const void *)uap, retval);
640 case 1:
641 return linux_write_ldt(l, (const void *)uap, 1);
642 case 2:
643 #ifdef notyet
644 return linux_read_default_ldt(l, (const void *)uap, retval);
645 #else
646 return (ENOSYS);
647 #endif
648 case 0x11:
649 return linux_write_ldt(l, (const void *)uap, 0);
650 #endif /* USER_LDT */
651
652 default:
653 return (ENOSYS);
654 }
655 }
656
657 /*
658 * XXX Pathetic hack to make svgalib work. This will fake the major
659 * device number of an opened VT so that svgalib likes it. grmbl.
660 * Should probably do it 'wrong the right way' and use a mapping
661 * array for all major device numbers, and map linux_mknod too.
662 */
663 dev_t
664 linux_fakedev(dev_t dev, int raw)
665 {
666 extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
667 const struct cdevsw *cd = cdevsw_lookup(dev);
668
669 if (raw) {
670 #if (NWSDISPLAY > 0)
671 extern const struct cdevsw wsdisplay_cdevsw;
672 if (cd == &wsdisplay_cdevsw)
673 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
674 #endif
675 }
676
677 if (cd == &ptc_cdevsw)
678 return makedev(LINUX_PTC_MAJOR, minor(dev));
679 if (cd == &pts_cdevsw)
680 return makedev(LINUX_PTS_MAJOR, minor(dev));
681
682 return dev;
683 }
684
685 #if (NWSDISPLAY > 0)
686 /*
687 * That's not complete, but enough to get an X server running.
688 */
689 #define NR_KEYS 128
690 static const u_short plain_map[NR_KEYS] = {
691 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
692 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
693 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
694 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
695 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
696 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
697 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
698 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
699 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
700 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
701 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
702 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
703 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
704 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
705 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
706 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
707 }, shift_map[NR_KEYS] = {
708 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
709 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
710 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
711 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
712 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
713 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
714 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
715 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
716 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
717 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
718 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
719 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
720 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
721 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
722 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
723 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
724 }, altgr_map[NR_KEYS] = {
725 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
726 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
727 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
728 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
729 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
730 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
731 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
732 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
733 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
734 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
735 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
736 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
737 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
738 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
739 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
740 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
741 }, ctrl_map[NR_KEYS] = {
742 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
743 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
744 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
745 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
746 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
747 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
748 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
749 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
750 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
751 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
752 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
753 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
754 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
755 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
756 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
757 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
758 };
759
760 const u_short * const linux_keytabs[] = {
761 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
762 };
763 #endif
764
765 static struct biosdisk_info *
766 fd2biosinfo(struct proc *p, struct file *fp)
767 {
768 struct vnode *vp;
769 const char *blkname;
770 char diskname[16];
771 int i;
772 struct nativedisk_info *nip;
773 struct disklist *dl = x86_alldisks;
774
775 if (fp->f_type != DTYPE_VNODE)
776 return NULL;
777 vp = (struct vnode *)fp->f_data;
778
779 if (vp->v_type != VBLK)
780 return NULL;
781
782 blkname = devsw_blk2name(major(vp->v_rdev));
783 snprintf(diskname, sizeof diskname, "%s%llu", blkname,
784 (unsigned long long)DISKUNIT(vp->v_rdev));
785
786 for (i = 0; i < dl->dl_nnativedisks; i++) {
787 nip = &dl->dl_nativedisks[i];
788 if (strcmp(diskname, nip->ni_devname))
789 continue;
790 if (nip->ni_nmatches != 0)
791 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
792 }
793
794 return NULL;
795 }
796
797
798 /*
799 * We come here in a last attempt to satisfy a Linux ioctl() call
800 */
801 int
802 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
803 {
804 /* {
805 syscallarg(int) fd;
806 syscallarg(u_long) com;
807 syscallarg(void *) data;
808 } */
809 struct sys_ioctl_args bia;
810 u_long com;
811 int error, error1;
812 #if (NWSDISPLAY > 0)
813 struct vt_mode lvt;
814 struct kbentry kbe;
815 #endif
816 struct linux_hd_geometry hdg;
817 struct linux_hd_big_geometry hdg_big;
818 struct biosdisk_info *bip;
819 file_t *fp;
820 int fd;
821 struct disklabel label, *labp;
822 struct partinfo partp;
823 int (*ioctlf)(struct file *, u_long, void *);
824 u_long start, biostotal, realtotal;
825 u_char heads, sectors;
826 u_int cylinders;
827 struct ioctl_pt pt;
828
829 fd = SCARG(uap, fd);
830 SCARG(&bia, fd) = fd;
831 SCARG(&bia, data) = SCARG(uap, data);
832 com = SCARG(uap, com);
833
834 if ((fp = fd_getfile(fd)) == NULL)
835 return (EBADF);
836
837 switch (com) {
838 #if (NWSDISPLAY > 0)
839 case LINUX_KDGKBMODE:
840 com = KDGKBMODE;
841 break;
842 case LINUX_KDSKBMODE:
843 com = KDSKBMODE;
844 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
845 SCARG(&bia, data) = (void *)K_RAW;
846 break;
847 case LINUX_KIOCSOUND:
848 SCARG(&bia, data) =
849 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
850 /* fall through */
851 case LINUX_KDMKTONE:
852 com = KDMKTONE;
853 break;
854 case LINUX_KDSETMODE:
855 com = KDSETMODE;
856 break;
857 case LINUX_KDGETMODE:
858 /* KD_* values are equal to the wscons numbers */
859 com = WSDISPLAYIO_GMODE;
860 break;
861 case LINUX_KDENABIO:
862 com = KDENABIO;
863 break;
864 case LINUX_KDDISABIO:
865 com = KDDISABIO;
866 break;
867 case LINUX_KDGETLED:
868 com = KDGETLED;
869 break;
870 case LINUX_KDSETLED:
871 com = KDSETLED;
872 break;
873 case LINUX_VT_OPENQRY:
874 com = VT_OPENQRY;
875 break;
876 case LINUX_VT_GETMODE:
877 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
878 if (error != 0)
879 goto out;
880 lvt.relsig = native_to_linux_signo[lvt.relsig];
881 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
882 lvt.frsig = native_to_linux_signo[lvt.frsig];
883 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
884 goto out;
885 case LINUX_VT_SETMODE:
886 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
887 if (error != 0)
888 goto out;
889 lvt.relsig = linux_to_native_signo[lvt.relsig];
890 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
891 lvt.frsig = linux_to_native_signo[lvt.frsig];
892 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
893 goto out;
894 case LINUX_VT_DISALLOCATE:
895 /* XXX should use WSDISPLAYIO_DELSCREEN */
896 error = 0;
897 goto out;
898 case LINUX_VT_RELDISP:
899 com = VT_RELDISP;
900 break;
901 case LINUX_VT_ACTIVATE:
902 com = VT_ACTIVATE;
903 break;
904 case LINUX_VT_WAITACTIVE:
905 com = VT_WAITACTIVE;
906 break;
907 case LINUX_VT_GETSTATE:
908 com = VT_GETSTATE;
909 break;
910 case LINUX_KDGKBTYPE:
911 {
912 static const u_int8_t kb101 = KB_101;
913
914 /* This is what Linux does. */
915 error = copyout(&kb101, SCARG(uap, data), 1);
916 goto out;
917 }
918 case LINUX_KDGKBENT:
919 /*
920 * The Linux KDGKBENT ioctl is different from the
921 * SYSV original. So we handle it in machdep code.
922 * XXX We should use keyboard mapping information
923 * from wsdisplay, but this would be expensive.
924 */
925 if ((error = copyin(SCARG(uap, data), &kbe,
926 sizeof(struct kbentry))))
927 goto out;
928 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
929 || kbe.kb_index >= NR_KEYS) {
930 error = EINVAL;
931 goto out;
932 }
933 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
934 error = copyout(&kbe, SCARG(uap, data),
935 sizeof(struct kbentry));
936 goto out;
937 #endif
938 case LINUX_HDIO_GETGEO:
939 case LINUX_HDIO_GETGEO_BIG:
940 /*
941 * Try to mimic Linux behaviour: return the BIOS geometry
942 * if possible (extending its # of cylinders if it's beyond
943 * the 1023 limit), fall back to the MI geometry (i.e.
944 * the real geometry) if not found, by returning an
945 * error. See common/linux_hdio.c
946 */
947 bip = fd2biosinfo(curproc, fp);
948 ioctlf = fp->f_ops->fo_ioctl;
949 error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
950 error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
951 if (error != 0 && error1 != 0) {
952 error = error1;
953 goto out;
954 }
955 labp = error != 0 ? &label : partp.disklab;
956 start = error1 != 0 ? partp.part->p_offset : 0;
957 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
958 && bip->bi_cyl != 0) {
959 heads = bip->bi_head;
960 sectors = bip->bi_sec;
961 cylinders = bip->bi_cyl;
962 biostotal = heads * sectors * cylinders;
963 realtotal = labp->d_ntracks * labp->d_nsectors *
964 labp->d_ncylinders;
965 if (realtotal > biostotal)
966 cylinders = realtotal / (heads * sectors);
967 } else {
968 heads = labp->d_ntracks;
969 cylinders = labp->d_ncylinders;
970 sectors = labp->d_nsectors;
971 }
972 if (com == LINUX_HDIO_GETGEO) {
973 hdg.start = start;
974 hdg.heads = heads;
975 hdg.cylinders = cylinders;
976 hdg.sectors = sectors;
977 error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
978 goto out;
979 } else {
980 hdg_big.start = start;
981 hdg_big.heads = heads;
982 hdg_big.cylinders = cylinders;
983 hdg_big.sectors = sectors;
984 error = copyout(&hdg_big, SCARG(uap, data),
985 sizeof hdg_big);
986 goto out;
987 }
988
989 default:
990 /*
991 * Unknown to us. If it's on a device, just pass it through
992 * using PTIOCLINUX, the device itself might be able to
993 * make some sense of it.
994 * XXX hack: if the function returns EJUSTRETURN,
995 * it has stuffed a sysctl return value in pt.data.
996 */
997 ioctlf = fp->f_ops->fo_ioctl;
998 pt.com = SCARG(uap, com);
999 pt.data = SCARG(uap, data);
1000 error = ioctlf(fp, PTIOCLINUX, &pt);
1001 if (error == EJUSTRETURN) {
1002 retval[0] = (register_t)pt.data;
1003 error = 0;
1004 }
1005
1006 if (error == ENOTTY) {
1007 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1008 com));
1009 }
1010 goto out;
1011 }
1012 SCARG(&bia, com) = com;
1013 error = sys_ioctl(curlwp, &bia, retval);
1014 out:
1015 fd_putfile(fd);
1016 return error;
1017 }
1018
1019 /*
1020 * Set I/O permissions for a process. Just set the maximum level
1021 * right away (ignoring the argument), otherwise we would have
1022 * to rely on I/O permission maps, which are not implemented.
1023 */
1024 int
1025 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
1026 {
1027 /* {
1028 syscallarg(int) level;
1029 } */
1030 struct trapframe *fp = l->l_md.md_regs;
1031
1032 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1033 NULL, NULL, NULL, NULL) != 0)
1034 return EPERM;
1035 fp->tf_eflags |= PSL_IOPL;
1036 *retval = 0;
1037 return 0;
1038 }
1039
1040 /*
1041 * See above. If a root process tries to set access to an I/O port,
1042 * just let it have the whole range.
1043 */
1044 int
1045 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1046 {
1047 /* {
1048 syscallarg(unsigned int) lo;
1049 syscallarg(unsigned int) hi;
1050 syscallarg(int) val;
1051 } */
1052 struct trapframe *fp = l->l_md.md_regs;
1053
1054 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1055 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1056 NULL, NULL) != 0)
1057 return EPERM;
1058 if (SCARG(uap, val))
1059 fp->tf_eflags |= PSL_IOPL;
1060 *retval = 0;
1061 return 0;
1062 }
1063
1064 int
1065 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1066 void *arg)
1067 {
1068 return 0;
1069 }
1070
1071 const char *
1072 linux_get_uname_arch(void)
1073 {
1074 static char uname_arch[5] = "i386";
1075
1076 if (uname_arch[1] == '3')
1077 uname_arch[1] += cpu_class;
1078 return uname_arch;
1079 }
1080