linux_machdep.c revision 1.159 1 /* $NetBSD: linux_machdep.c,v 1.159 2014/11/09 17:48:07 maxv Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.159 2014/11/09 17:48:07 maxv Exp $");
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_vm86.h"
37 #include "opt_user_ldt.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/kernel.h>
44 #include <sys/proc.h>
45 #include <sys/buf.h>
46 #include <sys/reboot.h>
47 #include <sys/conf.h>
48 #include <sys/exec.h>
49 #include <sys/file.h>
50 #include <sys/callout.h>
51 #include <sys/mbuf.h>
52 #include <sys/msgbuf.h>
53 #include <sys/mount.h>
54 #include <sys/vnode.h>
55 #include <sys/device.h>
56 #include <sys/syscallargs.h>
57 #include <sys/filedesc.h>
58 #include <sys/exec_elf.h>
59 #include <sys/disklabel.h>
60 #include <sys/ioctl.h>
61 #include <sys/wait.h>
62 #include <sys/kauth.h>
63 #include <sys/kmem.h>
64
65 #include <miscfs/specfs/specdev.h>
66
67 #include <compat/linux/common/linux_types.h>
68 #include <compat/linux/common/linux_signal.h>
69 #include <compat/linux/common/linux_util.h>
70 #include <compat/linux/common/linux_ioctl.h>
71 #include <compat/linux/common/linux_hdio.h>
72 #include <compat/linux/common/linux_exec.h>
73 #include <compat/linux/common/linux_machdep.h>
74 #include <compat/linux/common/linux_errno.h>
75
76 #include <compat/linux/linux_syscallargs.h>
77
78 #include <sys/cpu.h>
79 #include <machine/cpufunc.h>
80 #include <machine/psl.h>
81 #include <machine/reg.h>
82 #include <machine/segments.h>
83 #include <machine/specialreg.h>
84 #include <machine/sysarch.h>
85 #include <machine/vm86.h>
86 #include <machine/vmparam.h>
87
88 #include <x86/fpu.h>
89
90 /*
91 * To see whether wscons is configured (for virtual console ioctl calls).
92 */
93 #if defined(_KERNEL_OPT)
94 #include "wsdisplay.h"
95 #endif
96 #if (NWSDISPLAY > 0)
97 #include <dev/wscons/wsconsio.h>
98 #include <dev/wscons/wsdisplay_usl_io.h>
99 #if defined(_KERNEL_OPT)
100 #include "opt_xserver.h"
101 #endif
102 #endif
103
104 #ifdef DEBUG_LINUX
105 #define DPRINTF(a) uprintf a
106 #else
107 #define DPRINTF(a)
108 #endif
109
110 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
111 extern struct disklist *x86_alldisks;
112 static void linux_save_ucontext(struct lwp *, struct trapframe *,
113 const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
114 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
115 const sigset_t *, struct linux_sigcontext *);
116 static int linux_restore_sigcontext(struct lwp *,
117 struct linux_sigcontext *, register_t *);
118 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
119 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
120
121 extern char linux_sigcode[], linux_rt_sigcode[];
122
123 /*
124 * Deal with some i386-specific things in the Linux emulation code.
125 */
126
127 void
128 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
129 {
130 struct trapframe *tf;
131
132 #ifdef USER_LDT
133 pmap_ldt_cleanup(l);
134 #endif
135
136 fpu_save_area_clear(l, __Linux_NPXCW__);
137
138 tf = l->l_md.md_regs;
139 tf->tf_gs = 0;
140 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
141 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
142 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
143 tf->tf_edi = 0;
144 tf->tf_esi = 0;
145 tf->tf_ebp = 0;
146 tf->tf_ebx = l->l_proc->p_psstrp;
147 tf->tf_edx = 0;
148 tf->tf_ecx = 0;
149 tf->tf_eax = 0;
150 tf->tf_eip = epp->ep_entry;
151 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
152 tf->tf_eflags = PSL_USERSET;
153 tf->tf_esp = stack;
154 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
155 }
156
157 /*
158 * Send an interrupt to process.
159 *
160 * Stack is set up to allow sigcode stored
161 * in u. to call routine, followed by kcall
162 * to sigreturn routine below. After sigreturn
163 * resets the signal mask, the stack, and the
164 * frame pointer, it returns to the user
165 * specified pc, psl.
166 */
167
168 void
169 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
170 {
171 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
172 linux_rt_sendsig(ksi, mask);
173 else
174 linux_old_sendsig(ksi, mask);
175 }
176
177
178 static void
179 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
180 {
181 uc->uc_flags = 0;
182 uc->uc_link = NULL;
183 native_to_linux_sigaltstack(&uc->uc_stack, sas);
184 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
185 native_to_linux_sigset(&uc->uc_sigmask, mask);
186 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
187 }
188
189 static void
190 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
191 const sigset_t *mask, struct linux_sigcontext *sc)
192 {
193 struct pcb *pcb = lwp_getpcb(l);
194
195 /* Save register context. */
196 #ifdef VM86
197 if (tf->tf_eflags & PSL_VM) {
198 sc->sc_gs = tf->tf_vm86_gs;
199 sc->sc_fs = tf->tf_vm86_fs;
200 sc->sc_es = tf->tf_vm86_es;
201 sc->sc_ds = tf->tf_vm86_ds;
202 sc->sc_eflags = get_vflags(l);
203 } else
204 #endif
205 {
206 sc->sc_gs = tf->tf_gs;
207 sc->sc_fs = tf->tf_fs;
208 sc->sc_es = tf->tf_es;
209 sc->sc_ds = tf->tf_ds;
210 sc->sc_eflags = tf->tf_eflags;
211 }
212 sc->sc_edi = tf->tf_edi;
213 sc->sc_esi = tf->tf_esi;
214 sc->sc_esp = tf->tf_esp;
215 sc->sc_ebp = tf->tf_ebp;
216 sc->sc_ebx = tf->tf_ebx;
217 sc->sc_edx = tf->tf_edx;
218 sc->sc_ecx = tf->tf_ecx;
219 sc->sc_eax = tf->tf_eax;
220 sc->sc_eip = tf->tf_eip;
221 sc->sc_cs = tf->tf_cs;
222 sc->sc_esp_at_signal = tf->tf_esp;
223 sc->sc_ss = tf->tf_ss;
224 sc->sc_err = tf->tf_err;
225 sc->sc_trapno = tf->tf_trapno;
226 sc->sc_cr2 = pcb->pcb_cr2;
227 sc->sc_387 = NULL;
228
229 /* Save signal stack. */
230 /* Linux doesn't save the onstack flag in sigframe */
231
232 /* Save signal mask. */
233 native_to_linux_old_sigset(&sc->sc_mask, mask);
234 }
235
236 static void
237 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
238 {
239 struct lwp *l = curlwp;
240 struct proc *p = l->l_proc;
241 struct trapframe *tf;
242 struct linux_rt_sigframe *fp, frame;
243 int onstack, error;
244 int sig = ksi->ksi_signo;
245 sig_t catcher = SIGACTION(p, sig).sa_handler;
246 struct sigaltstack *sas = &l->l_sigstk;
247
248 tf = l->l_md.md_regs;
249 /* Do we need to jump onto the signal stack? */
250 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
251 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
252
253
254 /* Allocate space for the signal handler context. */
255 if (onstack)
256 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
257 sas->ss_size);
258 else
259 fp = (struct linux_rt_sigframe *)tf->tf_esp;
260 fp--;
261
262 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
263 onstack, fp, sig, tf->tf_eip,
264 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
265
266 /* Build stack frame for signal trampoline. */
267 frame.sf_handler = catcher;
268 frame.sf_sig = native_to_linux_signo[sig];
269 frame.sf_sip = &fp->sf_si;
270 frame.sf_ucp = &fp->sf_uc;
271
272 /*
273 * XXX: the following code assumes that the constants for
274 * siginfo are the same between linux and NetBSD.
275 */
276 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info);
277
278 /* Save register context. */
279 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
280 sendsig_reset(l, sig);
281
282 mutex_exit(p->p_lock);
283 error = copyout(&frame, fp, sizeof(frame));
284 mutex_enter(p->p_lock);
285
286 if (error != 0) {
287 /*
288 * Process has trashed its stack; give it an illegal
289 * instruction to halt it in its tracks.
290 */
291 sigexit(l, SIGILL);
292 /* NOTREACHED */
293 }
294
295 /*
296 * Build context to run handler in.
297 */
298 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
299 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
300 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
301 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
302 (linux_rt_sigcode - linux_sigcode);
303 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
304 tf->tf_eflags &= ~PSL_CLEARSIG;
305 tf->tf_esp = (int)fp;
306 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
307
308 /* Remember that we're now on the signal stack. */
309 if (onstack)
310 sas->ss_flags |= SS_ONSTACK;
311 }
312
313 static void
314 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
315 {
316 struct lwp *l = curlwp;
317 struct proc *p = l->l_proc;
318 struct trapframe *tf;
319 struct linux_sigframe *fp, frame;
320 int onstack, error;
321 int sig = ksi->ksi_signo;
322 sig_t catcher = SIGACTION(p, sig).sa_handler;
323 struct sigaltstack *sas = &l->l_sigstk;
324
325 tf = l->l_md.md_regs;
326
327 /* Do we need to jump onto the signal stack? */
328 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
329 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
330
331 /* Allocate space for the signal handler context. */
332 if (onstack)
333 fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
334 sas->ss_size);
335 else
336 fp = (struct linux_sigframe *)tf->tf_esp;
337 fp--;
338
339 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
340 onstack, fp, sig, tf->tf_eip,
341 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
342
343 /* Build stack frame for signal trampoline. */
344 frame.sf_handler = catcher;
345 frame.sf_sig = native_to_linux_signo[sig];
346
347 linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
348 sendsig_reset(l, sig);
349
350 mutex_exit(p->p_lock);
351 error = copyout(&frame, fp, sizeof(frame));
352 mutex_enter(p->p_lock);
353
354 if (error != 0) {
355 /*
356 * Process has trashed its stack; give it an illegal
357 * instruction to halt it in its tracks.
358 */
359 sigexit(l, SIGILL);
360 /* NOTREACHED */
361 }
362
363 /*
364 * Build context to run handler in.
365 */
366 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
367 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
368 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
369 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
370 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
371 tf->tf_eflags &= ~PSL_CLEARSIG;
372 tf->tf_esp = (int)fp;
373 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
374
375 /* Remember that we're now on the signal stack. */
376 if (onstack)
377 sas->ss_flags |= SS_ONSTACK;
378 }
379
380 /*
381 * System call to cleanup state after a signal
382 * has been taken. Reset signal mask and
383 * stack state from context left by sendsig (above).
384 * Return to previous pc and psl as specified by
385 * context left by sendsig. Check carefully to
386 * make sure that the user has not modified the
387 * psl to gain improper privileges or to cause
388 * a machine fault.
389 */
390 int
391 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
392 {
393 /* {
394 syscallarg(struct linux_ucontext *) ucp;
395 } */
396 struct linux_ucontext context, *ucp = SCARG(uap, ucp);
397 int error;
398
399 /*
400 * The trampoline code hands us the context.
401 * It is unsafe to keep track of it ourselves, in the event that a
402 * program jumps out of a signal handler.
403 */
404 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
405 return error;
406
407 /* XXX XAX we can do better here by using more of the ucontext */
408 return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
409 }
410
411 int
412 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
413 {
414 /* {
415 syscallarg(struct linux_sigcontext *) scp;
416 } */
417 struct linux_sigcontext context, *scp = SCARG(uap, scp);
418 int error;
419
420 /*
421 * The trampoline code hands us the context.
422 * It is unsafe to keep track of it ourselves, in the event that a
423 * program jumps out of a signal handler.
424 */
425 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
426 return error;
427 return linux_restore_sigcontext(l, &context, retval);
428 }
429
430 static int
431 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
432 register_t *retval)
433 {
434 struct proc *p = l->l_proc;
435 struct sigaltstack *sas = &l->l_sigstk;
436 struct trapframe *tf;
437 sigset_t mask;
438 ssize_t ss_gap;
439
440 /* Restore register context. */
441 tf = l->l_md.md_regs;
442 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
443
444 #ifdef VM86
445 if (scp->sc_eflags & PSL_VM) {
446 void syscall_vm86(struct trapframe *);
447
448 tf->tf_vm86_gs = scp->sc_gs;
449 tf->tf_vm86_fs = scp->sc_fs;
450 tf->tf_vm86_es = scp->sc_es;
451 tf->tf_vm86_ds = scp->sc_ds;
452 set_vflags(l, scp->sc_eflags);
453 p->p_md.md_syscall = syscall_vm86;
454 } else
455 #endif
456 {
457 /*
458 * Check for security violations. If we're returning to
459 * protected mode, the CPU will validate the segment registers
460 * automatically and generate a trap on violations. We handle
461 * the trap, rather than doing all of the checking here.
462 */
463 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
464 !USERMODE(scp->sc_cs, scp->sc_eflags))
465 return EINVAL;
466
467 tf->tf_gs = scp->sc_gs;
468 tf->tf_fs = scp->sc_fs;
469 tf->tf_es = scp->sc_es;
470 tf->tf_ds = scp->sc_ds;
471 #ifdef VM86
472 if (tf->tf_eflags & PSL_VM)
473 (*p->p_emul->e_syscall_intern)(p);
474 #endif
475 tf->tf_eflags = scp->sc_eflags;
476 }
477 tf->tf_edi = scp->sc_edi;
478 tf->tf_esi = scp->sc_esi;
479 tf->tf_ebp = scp->sc_ebp;
480 tf->tf_ebx = scp->sc_ebx;
481 tf->tf_edx = scp->sc_edx;
482 tf->tf_ecx = scp->sc_ecx;
483 tf->tf_eax = scp->sc_eax;
484 tf->tf_eip = scp->sc_eip;
485 tf->tf_cs = scp->sc_cs;
486 tf->tf_esp = scp->sc_esp_at_signal;
487 tf->tf_ss = scp->sc_ss;
488
489 /* Restore signal stack. */
490 /*
491 * Linux really does it this way; it doesn't have space in sigframe
492 * to save the onstack flag.
493 */
494 mutex_enter(p->p_lock);
495 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
496 if (ss_gap >= 0 && ss_gap < sas->ss_size)
497 sas->ss_flags |= SS_ONSTACK;
498 else
499 sas->ss_flags &= ~SS_ONSTACK;
500
501 /* Restore signal mask. */
502 linux_old_to_native_sigset(&mask, &scp->sc_mask);
503 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
504 mutex_exit(p->p_lock);
505
506 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
507 return EJUSTRETURN;
508 }
509
510 #ifdef USER_LDT
511
512 static int
513 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
514 register_t *retval)
515 {
516 struct x86_get_ldt_args gl;
517 int error;
518 union descriptor *ldt_buf;
519 size_t sz;
520
521 /*
522 * I've checked the linux code - this function is asymetric with
523 * linux_write_ldt, and returns raw ldt entries.
524 * NB, the code I saw zerod the spare parts of the user buffer.
525 */
526
527 DPRINTF(("linux_read_ldt!"));
528
529 sz = 8192 * sizeof(*ldt_buf);
530 ldt_buf = kmem_zalloc(sz, KM_SLEEP);
531 gl.start = 0;
532 gl.desc = NULL;
533 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
534 error = x86_get_ldt1(l, &gl, ldt_buf);
535 /* NB gl.num might have changed */
536 if (error == 0) {
537 *retval = gl.num * sizeof *ldt;
538 error = copyout(ldt_buf, SCARG(uap, ptr),
539 gl.num * sizeof *ldt_buf);
540 }
541 kmem_free(ldt_buf, sz);
542
543 return error;
544 }
545
546 struct linux_ldt_info {
547 u_int entry_number;
548 u_long base_addr;
549 u_int limit;
550 u_int seg_32bit:1;
551 u_int contents:2;
552 u_int read_exec_only:1;
553 u_int limit_in_pages:1;
554 u_int seg_not_present:1;
555 u_int useable:1;
556 };
557
558 static int
559 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
560 int oldmode)
561 {
562 struct linux_ldt_info ldt_info;
563 union descriptor d;
564 struct x86_set_ldt_args sl;
565 int error;
566
567 DPRINTF(("linux_write_ldt %d\n", oldmode));
568 if (SCARG(uap, bytecount) != sizeof(ldt_info))
569 return (EINVAL);
570 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
571 return error;
572 if (ldt_info.entry_number >= 8192)
573 return (EINVAL);
574 if (ldt_info.contents == 3) {
575 if (oldmode)
576 return (EINVAL);
577 if (ldt_info.seg_not_present)
578 return (EINVAL);
579 }
580
581 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
582 (oldmode || (ldt_info.contents == 0 &&
583 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
584 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
585 ldt_info.useable == 0))) {
586 /* this means you should zero the ldt */
587 (void)memset(&d, 0, sizeof(d));
588 } else {
589 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
590 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
591 d.sd.sd_lolimit = ldt_info.limit & 0xffff;
592 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
593 d.sd.sd_type = 16 | (ldt_info.contents << 2) |
594 (!ldt_info.read_exec_only << 1);
595 d.sd.sd_dpl = SEL_UPL;
596 d.sd.sd_p = !ldt_info.seg_not_present;
597 d.sd.sd_def32 = ldt_info.seg_32bit;
598 d.sd.sd_gran = ldt_info.limit_in_pages;
599 if (!oldmode)
600 d.sd.sd_xx = ldt_info.useable;
601 else
602 d.sd.sd_xx = 0;
603 }
604 sl.start = ldt_info.entry_number;
605 sl.desc = NULL;
606 sl.num = 1;
607
608 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
609 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
610
611 return x86_set_ldt1(l, &sl, &d);
612 }
613
614 #endif /* USER_LDT */
615
616 int
617 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
618 {
619 /* {
620 syscallarg(int) func;
621 syscallarg(void *) ptr;
622 syscallarg(size_t) bytecount;
623 } */
624
625 switch (SCARG(uap, func)) {
626 #ifdef USER_LDT
627 case 0:
628 return linux_read_ldt(l, (const void *)uap, retval);
629 case 1:
630 return linux_write_ldt(l, (const void *)uap, 1);
631 case 2:
632 #ifdef notyet
633 return linux_read_default_ldt(l, (const void *)uap, retval);
634 #else
635 return (ENOSYS);
636 #endif
637 case 0x11:
638 return linux_write_ldt(l, (const void *)uap, 0);
639 #endif /* USER_LDT */
640
641 default:
642 return (ENOSYS);
643 }
644 }
645
646 /*
647 * XXX Pathetic hack to make svgalib work. This will fake the major
648 * device number of an opened VT so that svgalib likes it. grmbl.
649 * Should probably do it 'wrong the right way' and use a mapping
650 * array for all major device numbers, and map linux_mknod too.
651 */
652 dev_t
653 linux_fakedev(dev_t dev, int raw)
654 {
655 extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
656 const struct cdevsw *cd = cdevsw_lookup(dev);
657
658 if (raw) {
659 #if (NWSDISPLAY > 0)
660 extern const struct cdevsw wsdisplay_cdevsw;
661 if (cd == &wsdisplay_cdevsw)
662 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
663 #endif
664 }
665
666 if (cd == &ptc_cdevsw)
667 return makedev(LINUX_PTC_MAJOR, minor(dev));
668 if (cd == &pts_cdevsw)
669 return makedev(LINUX_PTS_MAJOR, minor(dev));
670
671 return dev;
672 }
673
674 #if (NWSDISPLAY > 0)
675 /*
676 * That's not complete, but enough to get an X server running.
677 */
678 #define NR_KEYS 128
679 static const u_short plain_map[NR_KEYS] = {
680 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
681 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
682 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
683 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
684 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
685 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
686 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
687 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
688 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
689 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
690 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
691 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
692 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
693 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
694 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
695 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
696 }, shift_map[NR_KEYS] = {
697 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
698 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
699 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
700 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
701 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
702 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
703 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
704 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
705 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
706 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
707 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
708 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
709 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
710 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
711 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
712 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
713 }, altgr_map[NR_KEYS] = {
714 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
715 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
716 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
717 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
718 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
719 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
720 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
721 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
722 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
723 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
724 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
725 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
726 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
727 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
728 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
729 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
730 }, ctrl_map[NR_KEYS] = {
731 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
732 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
733 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
734 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
735 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
736 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
737 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
738 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
739 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
740 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
741 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
742 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
743 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
744 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
745 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
746 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
747 };
748
749 const u_short * const linux_keytabs[] = {
750 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
751 };
752 #endif
753
754 static struct biosdisk_info *
755 fd2biosinfo(struct proc *p, struct file *fp)
756 {
757 struct vnode *vp;
758 const char *blkname;
759 char diskname[16];
760 int i;
761 struct nativedisk_info *nip;
762 struct disklist *dl = x86_alldisks;
763
764 if (fp->f_type != DTYPE_VNODE)
765 return NULL;
766 vp = (struct vnode *)fp->f_data;
767
768 if (vp->v_type != VBLK)
769 return NULL;
770
771 blkname = devsw_blk2name(major(vp->v_rdev));
772 snprintf(diskname, sizeof diskname, "%s%llu", blkname,
773 (unsigned long long)DISKUNIT(vp->v_rdev));
774
775 for (i = 0; i < dl->dl_nnativedisks; i++) {
776 nip = &dl->dl_nativedisks[i];
777 if (strcmp(diskname, nip->ni_devname))
778 continue;
779 if (nip->ni_nmatches != 0)
780 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
781 }
782
783 return NULL;
784 }
785
786
787 /*
788 * We come here in a last attempt to satisfy a Linux ioctl() call
789 */
790 int
791 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
792 {
793 /* {
794 syscallarg(int) fd;
795 syscallarg(u_long) com;
796 syscallarg(void *) data;
797 } */
798 struct sys_ioctl_args bia;
799 u_long com;
800 int error, error1;
801 #if (NWSDISPLAY > 0)
802 struct vt_mode lvt;
803 struct kbentry kbe;
804 #endif
805 struct linux_hd_geometry hdg;
806 struct linux_hd_big_geometry hdg_big;
807 struct biosdisk_info *bip;
808 file_t *fp;
809 int fd;
810 struct disklabel label, *labp;
811 struct partinfo partp;
812 int (*ioctlf)(struct file *, u_long, void *);
813 u_long start, biostotal, realtotal;
814 u_char heads, sectors;
815 u_int cylinders;
816 struct ioctl_pt pt;
817
818 fd = SCARG(uap, fd);
819 SCARG(&bia, fd) = fd;
820 SCARG(&bia, data) = SCARG(uap, data);
821 com = SCARG(uap, com);
822
823 if ((fp = fd_getfile(fd)) == NULL)
824 return (EBADF);
825
826 switch (com) {
827 #if (NWSDISPLAY > 0)
828 case LINUX_KDGKBMODE:
829 com = KDGKBMODE;
830 break;
831 case LINUX_KDSKBMODE:
832 com = KDSKBMODE;
833 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
834 SCARG(&bia, data) = (void *)K_RAW;
835 break;
836 case LINUX_KIOCSOUND:
837 SCARG(&bia, data) =
838 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
839 /* fall through */
840 case LINUX_KDMKTONE:
841 com = KDMKTONE;
842 break;
843 case LINUX_KDSETMODE:
844 com = KDSETMODE;
845 break;
846 case LINUX_KDGETMODE:
847 /* KD_* values are equal to the wscons numbers */
848 com = WSDISPLAYIO_GMODE;
849 break;
850 case LINUX_KDENABIO:
851 com = KDENABIO;
852 break;
853 case LINUX_KDDISABIO:
854 com = KDDISABIO;
855 break;
856 case LINUX_KDGETLED:
857 com = KDGETLED;
858 break;
859 case LINUX_KDSETLED:
860 com = KDSETLED;
861 break;
862 case LINUX_VT_OPENQRY:
863 com = VT_OPENQRY;
864 break;
865 case LINUX_VT_GETMODE:
866 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
867 if (error != 0)
868 goto out;
869 lvt.relsig = native_to_linux_signo[lvt.relsig];
870 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
871 lvt.frsig = native_to_linux_signo[lvt.frsig];
872 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
873 goto out;
874 case LINUX_VT_SETMODE:
875 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
876 if (error != 0)
877 goto out;
878 lvt.relsig = linux_to_native_signo[lvt.relsig];
879 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
880 lvt.frsig = linux_to_native_signo[lvt.frsig];
881 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
882 goto out;
883 case LINUX_VT_DISALLOCATE:
884 /* XXX should use WSDISPLAYIO_DELSCREEN */
885 error = 0;
886 goto out;
887 case LINUX_VT_RELDISP:
888 com = VT_RELDISP;
889 break;
890 case LINUX_VT_ACTIVATE:
891 com = VT_ACTIVATE;
892 break;
893 case LINUX_VT_WAITACTIVE:
894 com = VT_WAITACTIVE;
895 break;
896 case LINUX_VT_GETSTATE:
897 com = VT_GETSTATE;
898 break;
899 case LINUX_KDGKBTYPE:
900 {
901 static const u_int8_t kb101 = KB_101;
902
903 /* This is what Linux does. */
904 error = copyout(&kb101, SCARG(uap, data), 1);
905 goto out;
906 }
907 case LINUX_KDGKBENT:
908 /*
909 * The Linux KDGKBENT ioctl is different from the
910 * SYSV original. So we handle it in machdep code.
911 * XXX We should use keyboard mapping information
912 * from wsdisplay, but this would be expensive.
913 */
914 if ((error = copyin(SCARG(uap, data), &kbe,
915 sizeof(struct kbentry))))
916 goto out;
917 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
918 || kbe.kb_index >= NR_KEYS) {
919 error = EINVAL;
920 goto out;
921 }
922 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
923 error = copyout(&kbe, SCARG(uap, data),
924 sizeof(struct kbentry));
925 goto out;
926 #endif
927 case LINUX_HDIO_GETGEO:
928 case LINUX_HDIO_GETGEO_BIG:
929 /*
930 * Try to mimic Linux behaviour: return the BIOS geometry
931 * if possible (extending its # of cylinders if it's beyond
932 * the 1023 limit), fall back to the MI geometry (i.e.
933 * the real geometry) if not found, by returning an
934 * error. See common/linux_hdio.c
935 */
936 bip = fd2biosinfo(curproc, fp);
937 ioctlf = fp->f_ops->fo_ioctl;
938 error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
939 error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
940 if (error != 0 && error1 != 0) {
941 error = error1;
942 goto out;
943 }
944 labp = error != 0 ? &label : partp.disklab;
945 start = error1 != 0 ? partp.part->p_offset : 0;
946 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
947 && bip->bi_cyl != 0) {
948 heads = bip->bi_head;
949 sectors = bip->bi_sec;
950 cylinders = bip->bi_cyl;
951 biostotal = heads * sectors * cylinders;
952 realtotal = labp->d_ntracks * labp->d_nsectors *
953 labp->d_ncylinders;
954 if (realtotal > biostotal)
955 cylinders = realtotal / (heads * sectors);
956 } else {
957 heads = labp->d_ntracks;
958 cylinders = labp->d_ncylinders;
959 sectors = labp->d_nsectors;
960 }
961 if (com == LINUX_HDIO_GETGEO) {
962 hdg.start = start;
963 hdg.heads = heads;
964 hdg.cylinders = cylinders;
965 hdg.sectors = sectors;
966 error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
967 goto out;
968 } else {
969 hdg_big.start = start;
970 hdg_big.heads = heads;
971 hdg_big.cylinders = cylinders;
972 hdg_big.sectors = sectors;
973 error = copyout(&hdg_big, SCARG(uap, data),
974 sizeof hdg_big);
975 goto out;
976 }
977
978 default:
979 /*
980 * Unknown to us. If it's on a device, just pass it through
981 * using PTIOCLINUX, the device itself might be able to
982 * make some sense of it.
983 * XXX hack: if the function returns EJUSTRETURN,
984 * it has stuffed a sysctl return value in pt.data.
985 */
986 ioctlf = fp->f_ops->fo_ioctl;
987 pt.com = SCARG(uap, com);
988 pt.data = SCARG(uap, data);
989 error = ioctlf(fp, PTIOCLINUX, &pt);
990 if (error == EJUSTRETURN) {
991 retval[0] = (register_t)pt.data;
992 error = 0;
993 }
994
995 if (error == ENOTTY) {
996 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
997 com));
998 }
999 goto out;
1000 }
1001 SCARG(&bia, com) = com;
1002 error = sys_ioctl(curlwp, &bia, retval);
1003 out:
1004 fd_putfile(fd);
1005 return error;
1006 }
1007
1008 /*
1009 * Set I/O permissions for a process. Just set the maximum level
1010 * right away (ignoring the argument), otherwise we would have
1011 * to rely on I/O permission maps, which are not implemented.
1012 */
1013 int
1014 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
1015 {
1016 /* {
1017 syscallarg(int) level;
1018 } */
1019 struct trapframe *fp = l->l_md.md_regs;
1020
1021 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1022 NULL, NULL, NULL, NULL) != 0)
1023 return EPERM;
1024 fp->tf_eflags |= PSL_IOPL;
1025 *retval = 0;
1026 return 0;
1027 }
1028
1029 /*
1030 * See above. If a root process tries to set access to an I/O port,
1031 * just let it have the whole range.
1032 */
1033 int
1034 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1035 {
1036 /* {
1037 syscallarg(unsigned int) lo;
1038 syscallarg(unsigned int) hi;
1039 syscallarg(int) val;
1040 } */
1041 struct trapframe *fp = l->l_md.md_regs;
1042
1043 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1044 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1045 NULL, NULL) != 0)
1046 return EPERM;
1047 if (SCARG(uap, val))
1048 fp->tf_eflags |= PSL_IOPL;
1049 *retval = 0;
1050 return 0;
1051 }
1052
1053 int
1054 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1055 void *arg)
1056 {
1057 return 0;
1058 }
1059
1060 const char *
1061 linux_get_uname_arch(void)
1062 {
1063 static char uname_arch[5] = "i386";
1064
1065 if (uname_arch[1] == '3')
1066 uname_arch[1] += cpu_class;
1067 return uname_arch;
1068 }
1069