linux_machdep.c revision 1.164 1 /* $NetBSD: linux_machdep.c,v 1.164 2017/08/12 07:07:53 maxv Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.164 2017/08/12 07:07:53 maxv Exp $");
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_user_ldt.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/signalvar.h>
42 #include <sys/kernel.h>
43 #include <sys/proc.h>
44 #include <sys/buf.h>
45 #include <sys/reboot.h>
46 #include <sys/conf.h>
47 #include <sys/exec.h>
48 #include <sys/file.h>
49 #include <sys/callout.h>
50 #include <sys/mbuf.h>
51 #include <sys/msgbuf.h>
52 #include <sys/mount.h>
53 #include <sys/vnode.h>
54 #include <sys/device.h>
55 #include <sys/syscallargs.h>
56 #include <sys/filedesc.h>
57 #include <sys/exec_elf.h>
58 #include <sys/disklabel.h>
59 #include <sys/ioctl.h>
60 #include <sys/wait.h>
61 #include <sys/kauth.h>
62 #include <sys/kmem.h>
63
64 #include <miscfs/specfs/specdev.h>
65
66 #include <compat/linux/common/linux_types.h>
67 #include <compat/linux/common/linux_signal.h>
68 #include <compat/linux/common/linux_util.h>
69 #include <compat/linux/common/linux_ioctl.h>
70 #include <compat/linux/common/linux_hdio.h>
71 #include <compat/linux/common/linux_exec.h>
72 #include <compat/linux/common/linux_machdep.h>
73 #include <compat/linux/common/linux_errno.h>
74
75 #include <compat/linux/linux_syscallargs.h>
76
77 #include <sys/cpu.h>
78 #include <machine/cpufunc.h>
79 #include <machine/psl.h>
80 #include <machine/reg.h>
81 #include <machine/segments.h>
82 #include <machine/specialreg.h>
83 #include <machine/sysarch.h>
84 #include <machine/vmparam.h>
85
86 #include <x86/fpu.h>
87
88 /*
89 * To see whether wscons is configured (for virtual console ioctl calls).
90 */
91 #if defined(_KERNEL_OPT)
92 #include "wsdisplay.h"
93 #endif
94 #if (NWSDISPLAY > 0)
95 #include <dev/wscons/wsconsio.h>
96 #include <dev/wscons/wsdisplay_usl_io.h>
97 #if defined(_KERNEL_OPT)
98 #include "opt_xserver.h"
99 #endif
100 #endif
101
102 #ifdef DEBUG_LINUX
103 #define DPRINTF(a) uprintf a
104 #else
105 #define DPRINTF(a)
106 #endif
107
108 extern struct disklist *x86_alldisks;
109
110 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
111 static void linux_save_ucontext(struct lwp *, struct trapframe *,
112 const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
113 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
114 const sigset_t *, struct linux_sigcontext *);
115 static int linux_restore_sigcontext(struct lwp *,
116 struct linux_sigcontext *, register_t *);
117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
119
120 extern char linux_sigcode[], linux_rt_sigcode[];
121
122 /*
123 * Deal with some i386-specific things in the Linux emulation code.
124 */
125
126 void
127 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
128 {
129 struct trapframe *tf;
130
131 #ifdef USER_LDT
132 pmap_ldt_cleanup(l);
133 #endif
134
135 fpu_save_area_clear(l, __Linux_NPXCW__);
136
137 tf = l->l_md.md_regs;
138 tf->tf_gs = 0;
139 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
140 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
141 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
142 tf->tf_edi = 0;
143 tf->tf_esi = 0;
144 tf->tf_ebp = 0;
145 tf->tf_ebx = l->l_proc->p_psstrp;
146 tf->tf_edx = 0;
147 tf->tf_ecx = 0;
148 tf->tf_eax = 0;
149 tf->tf_eip = epp->ep_entry;
150 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
151 tf->tf_eflags = PSL_USERSET;
152 tf->tf_esp = stack;
153 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
154 }
155
156 /*
157 * Send an interrupt to process.
158 *
159 * Stack is set up to allow sigcode stored
160 * in u. to call routine, followed by kcall
161 * to sigreturn routine below. After sigreturn
162 * resets the signal mask, the stack, and the
163 * frame pointer, it returns to the user
164 * specified pc, psl.
165 */
166
167 void
168 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
169 {
170 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
171 linux_rt_sendsig(ksi, mask);
172 else
173 linux_old_sendsig(ksi, mask);
174 }
175
176
177 static void
178 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
179 {
180 uc->uc_flags = 0;
181 uc->uc_link = NULL;
182 native_to_linux_sigaltstack(&uc->uc_stack, sas);
183 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
184 native_to_linux_sigset(&uc->uc_sigmask, mask);
185 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
186 }
187
188 static void
189 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
190 const sigset_t *mask, struct linux_sigcontext *sc)
191 {
192 struct pcb *pcb = lwp_getpcb(l);
193
194 /* Save register context. */
195 sc->sc_gs = tf->tf_gs;
196 sc->sc_fs = tf->tf_fs;
197 sc->sc_es = tf->tf_es;
198 sc->sc_ds = tf->tf_ds;
199 sc->sc_eflags = tf->tf_eflags;
200
201 sc->sc_edi = tf->tf_edi;
202 sc->sc_esi = tf->tf_esi;
203 sc->sc_esp = tf->tf_esp;
204 sc->sc_ebp = tf->tf_ebp;
205 sc->sc_ebx = tf->tf_ebx;
206 sc->sc_edx = tf->tf_edx;
207 sc->sc_ecx = tf->tf_ecx;
208 sc->sc_eax = tf->tf_eax;
209 sc->sc_eip = tf->tf_eip;
210 sc->sc_cs = tf->tf_cs;
211 sc->sc_esp_at_signal = tf->tf_esp;
212 sc->sc_ss = tf->tf_ss;
213 sc->sc_err = tf->tf_err;
214 sc->sc_trapno = tf->tf_trapno;
215 sc->sc_cr2 = pcb->pcb_cr2;
216 sc->sc_387 = NULL;
217
218 /* Save signal stack. */
219 /* Linux doesn't save the onstack flag in sigframe */
220
221 /* Save signal mask. */
222 native_to_linux_old_sigset(&sc->sc_mask, mask);
223 }
224
225 static void
226 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
227 {
228 struct lwp *l = curlwp;
229 struct proc *p = l->l_proc;
230 struct trapframe *tf;
231 struct linux_rt_sigframe *fp, frame;
232 int onstack, error;
233 int sig = ksi->ksi_signo;
234 sig_t catcher = SIGACTION(p, sig).sa_handler;
235 struct sigaltstack *sas = &l->l_sigstk;
236
237 tf = l->l_md.md_regs;
238 /* Do we need to jump onto the signal stack? */
239 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
240 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
241
242
243 /* Allocate space for the signal handler context. */
244 if (onstack)
245 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
246 sas->ss_size);
247 else
248 fp = (struct linux_rt_sigframe *)tf->tf_esp;
249 fp--;
250
251 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
252 onstack, fp, sig, tf->tf_eip,
253 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
254
255 /* Build stack frame for signal trampoline. */
256 frame.sf_handler = catcher;
257 frame.sf_sig = native_to_linux_signo[sig];
258 frame.sf_sip = &fp->sf_si;
259 frame.sf_ucp = &fp->sf_uc;
260
261 /*
262 * XXX: the following code assumes that the constants for
263 * siginfo are the same between linux and NetBSD.
264 */
265 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info);
266
267 /* Save register context. */
268 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
269 sendsig_reset(l, sig);
270
271 mutex_exit(p->p_lock);
272 error = copyout(&frame, fp, sizeof(frame));
273 mutex_enter(p->p_lock);
274
275 if (error != 0) {
276 /*
277 * Process has trashed its stack; give it an illegal
278 * instruction to halt it in its tracks.
279 */
280 sigexit(l, SIGILL);
281 /* NOTREACHED */
282 }
283
284 /*
285 * Build context to run handler in.
286 */
287 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
288 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
289 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
290 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
291 (linux_rt_sigcode - linux_sigcode);
292 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
293 tf->tf_eflags &= ~PSL_CLEARSIG;
294 tf->tf_esp = (int)fp;
295 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
296
297 /* Remember that we're now on the signal stack. */
298 if (onstack)
299 sas->ss_flags |= SS_ONSTACK;
300 }
301
302 static void
303 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
304 {
305 struct lwp *l = curlwp;
306 struct proc *p = l->l_proc;
307 struct trapframe *tf;
308 struct linux_sigframe *fp, frame;
309 int onstack, error;
310 int sig = ksi->ksi_signo;
311 sig_t catcher = SIGACTION(p, sig).sa_handler;
312 struct sigaltstack *sas = &l->l_sigstk;
313
314 tf = l->l_md.md_regs;
315
316 /* Do we need to jump onto the signal stack? */
317 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
318 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
319
320 /* Allocate space for the signal handler context. */
321 if (onstack)
322 fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
323 sas->ss_size);
324 else
325 fp = (struct linux_sigframe *)tf->tf_esp;
326 fp--;
327
328 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
329 onstack, fp, sig, tf->tf_eip,
330 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
331
332 /* Build stack frame for signal trampoline. */
333 frame.sf_handler = catcher;
334 frame.sf_sig = native_to_linux_signo[sig];
335
336 linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
337 sendsig_reset(l, sig);
338
339 mutex_exit(p->p_lock);
340 error = copyout(&frame, fp, sizeof(frame));
341 mutex_enter(p->p_lock);
342
343 if (error != 0) {
344 /*
345 * Process has trashed its stack; give it an illegal
346 * instruction to halt it in its tracks.
347 */
348 sigexit(l, SIGILL);
349 /* NOTREACHED */
350 }
351
352 /*
353 * Build context to run handler in.
354 */
355 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
356 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
357 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
358 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
359 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
360 tf->tf_eflags &= ~PSL_CLEARSIG;
361 tf->tf_esp = (int)fp;
362 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
363
364 /* Remember that we're now on the signal stack. */
365 if (onstack)
366 sas->ss_flags |= SS_ONSTACK;
367 }
368
369 /*
370 * System call to cleanup state after a signal
371 * has been taken. Reset signal mask and
372 * stack state from context left by sendsig (above).
373 * Return to previous pc and psl as specified by
374 * context left by sendsig. Check carefully to
375 * make sure that the user has not modified the
376 * psl to gain improper privileges or to cause
377 * a machine fault.
378 */
379 int
380 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
381 {
382 /* {
383 syscallarg(struct linux_ucontext *) ucp;
384 } */
385 struct linux_ucontext context, *ucp = SCARG(uap, ucp);
386 int error;
387
388 /*
389 * The trampoline code hands us the context.
390 * It is unsafe to keep track of it ourselves, in the event that a
391 * program jumps out of a signal handler.
392 */
393 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
394 return error;
395
396 /* XXX XAX we can do better here by using more of the ucontext */
397 return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
398 }
399
400 int
401 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
402 {
403 /* {
404 syscallarg(struct linux_sigcontext *) scp;
405 } */
406 struct linux_sigcontext context, *scp = SCARG(uap, scp);
407 int error;
408
409 /*
410 * The trampoline code hands us the context.
411 * It is unsafe to keep track of it ourselves, in the event that a
412 * program jumps out of a signal handler.
413 */
414 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
415 return error;
416 return linux_restore_sigcontext(l, &context, retval);
417 }
418
419 static int
420 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
421 register_t *retval)
422 {
423 struct proc *p = l->l_proc;
424 struct sigaltstack *sas = &l->l_sigstk;
425 struct trapframe *tf;
426 sigset_t mask;
427 ssize_t ss_gap;
428
429 /* Restore register context. */
430 tf = l->l_md.md_regs;
431 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
432
433 /*
434 * Check for security violations. If we're returning to
435 * protected mode, the CPU will validate the segment registers
436 * automatically and generate a trap on violations. We handle
437 * the trap, rather than doing all of the checking here.
438 */
439 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
440 !USERMODE(scp->sc_cs, scp->sc_eflags))
441 return EINVAL;
442
443 tf->tf_gs = scp->sc_gs;
444 tf->tf_fs = scp->sc_fs;
445 tf->tf_es = scp->sc_es;
446 tf->tf_ds = scp->sc_ds;
447 tf->tf_eflags = scp->sc_eflags;
448
449 tf->tf_edi = scp->sc_edi;
450 tf->tf_esi = scp->sc_esi;
451 tf->tf_ebp = scp->sc_ebp;
452 tf->tf_ebx = scp->sc_ebx;
453 tf->tf_edx = scp->sc_edx;
454 tf->tf_ecx = scp->sc_ecx;
455 tf->tf_eax = scp->sc_eax;
456 tf->tf_eip = scp->sc_eip;
457 tf->tf_cs = scp->sc_cs;
458 tf->tf_esp = scp->sc_esp_at_signal;
459 tf->tf_ss = scp->sc_ss;
460
461 /* Restore signal stack. */
462 /*
463 * Linux really does it this way; it doesn't have space in sigframe
464 * to save the onstack flag.
465 */
466 mutex_enter(p->p_lock);
467 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
468 if (ss_gap >= 0 && ss_gap < sas->ss_size)
469 sas->ss_flags |= SS_ONSTACK;
470 else
471 sas->ss_flags &= ~SS_ONSTACK;
472
473 /* Restore signal mask. */
474 linux_old_to_native_sigset(&mask, &scp->sc_mask);
475 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
476 mutex_exit(p->p_lock);
477
478 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
479 return EJUSTRETURN;
480 }
481
482 #ifdef USER_LDT
483
484 static int
485 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
486 register_t *retval)
487 {
488 struct x86_get_ldt_args gl;
489 int error;
490 union descriptor *ldt_buf;
491 size_t sz;
492
493 /*
494 * I've checked the linux code - this function is asymetric with
495 * linux_write_ldt, and returns raw ldt entries.
496 * NB, the code I saw zerod the spare parts of the user buffer.
497 */
498
499 DPRINTF(("linux_read_ldt!"));
500
501 sz = 8192 * sizeof(*ldt_buf);
502 ldt_buf = kmem_zalloc(sz, KM_SLEEP);
503 gl.start = 0;
504 gl.desc = NULL;
505 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
506 error = x86_get_ldt1(l, &gl, ldt_buf);
507 /* NB gl.num might have changed */
508 if (error == 0) {
509 *retval = gl.num * sizeof(*ldtstore);
510 error = copyout(ldt_buf, SCARG(uap, ptr),
511 gl.num * sizeof *ldt_buf);
512 }
513 kmem_free(ldt_buf, sz);
514
515 return error;
516 }
517
518 struct linux_ldt_info {
519 u_int entry_number;
520 u_long base_addr;
521 u_int limit;
522 u_int seg_32bit:1;
523 u_int contents:2;
524 u_int read_exec_only:1;
525 u_int limit_in_pages:1;
526 u_int seg_not_present:1;
527 u_int useable:1;
528 };
529
530 static int
531 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
532 int oldmode)
533 {
534 struct linux_ldt_info ldt_info;
535 union descriptor d;
536 struct x86_set_ldt_args sl;
537 int error;
538
539 DPRINTF(("linux_write_ldt %d\n", oldmode));
540 if (SCARG(uap, bytecount) != sizeof(ldt_info))
541 return (EINVAL);
542 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
543 return error;
544 if (ldt_info.entry_number >= 8192)
545 return (EINVAL);
546 if (ldt_info.contents == 3) {
547 if (oldmode)
548 return (EINVAL);
549 if (ldt_info.seg_not_present)
550 return (EINVAL);
551 }
552
553 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
554 (oldmode || (ldt_info.contents == 0 &&
555 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
556 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
557 ldt_info.useable == 0))) {
558 /* this means you should zero the ldt */
559 (void)memset(&d, 0, sizeof(d));
560 } else {
561 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
562 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
563 d.sd.sd_lolimit = ldt_info.limit & 0xffff;
564 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
565 d.sd.sd_type = 16 | (ldt_info.contents << 2) |
566 (!ldt_info.read_exec_only << 1);
567 d.sd.sd_dpl = SEL_UPL;
568 d.sd.sd_p = !ldt_info.seg_not_present;
569 d.sd.sd_def32 = ldt_info.seg_32bit;
570 d.sd.sd_gran = ldt_info.limit_in_pages;
571 if (!oldmode)
572 d.sd.sd_xx = ldt_info.useable;
573 else
574 d.sd.sd_xx = 0;
575 }
576 sl.start = ldt_info.entry_number;
577 sl.desc = NULL;
578 sl.num = 1;
579
580 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
581 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
582
583 return x86_set_ldt1(l, &sl, &d);
584 }
585
586 #endif /* USER_LDT */
587
588 int
589 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
590 {
591 /* {
592 syscallarg(int) func;
593 syscallarg(void *) ptr;
594 syscallarg(size_t) bytecount;
595 } */
596
597 switch (SCARG(uap, func)) {
598 #ifdef USER_LDT
599 case 0:
600 return linux_read_ldt(l, (const void *)uap, retval);
601 case 1:
602 return linux_write_ldt(l, (const void *)uap, 1);
603 case 2:
604 #ifdef notyet
605 return linux_read_default_ldt(l, (const void *)uap, retval);
606 #else
607 return (ENOSYS);
608 #endif
609 case 0x11:
610 return linux_write_ldt(l, (const void *)uap, 0);
611 #endif /* USER_LDT */
612
613 default:
614 return (ENOSYS);
615 }
616 }
617
618 /*
619 * XXX Pathetic hack to make svgalib work. This will fake the major
620 * device number of an opened VT so that svgalib likes it. grmbl.
621 * Should probably do it 'wrong the right way' and use a mapping
622 * array for all major device numbers, and map linux_mknod too.
623 */
624 dev_t
625 linux_fakedev(dev_t dev, int raw)
626 {
627 extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
628 const struct cdevsw *cd = cdevsw_lookup(dev);
629
630 if (raw) {
631 #if (NWSDISPLAY > 0)
632 extern const struct cdevsw wsdisplay_cdevsw;
633 if (cd == &wsdisplay_cdevsw)
634 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
635 #endif
636 }
637
638 if (cd == &ptc_cdevsw)
639 return makedev(LINUX_PTC_MAJOR, minor(dev));
640 if (cd == &pts_cdevsw)
641 return makedev(LINUX_PTS_MAJOR, minor(dev));
642
643 return dev;
644 }
645
646 #if (NWSDISPLAY > 0)
647 /*
648 * That's not complete, but enough to get an X server running.
649 */
650 #define NR_KEYS 128
651 static const u_short plain_map[NR_KEYS] = {
652 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
653 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
654 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
655 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
656 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
657 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
658 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
659 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
660 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
661 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
662 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
663 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
664 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
665 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
666 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
667 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
668 }, shift_map[NR_KEYS] = {
669 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
670 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
671 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
672 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
673 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
674 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
675 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
676 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
677 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
678 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
679 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
680 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
681 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
682 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
683 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
684 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
685 }, altgr_map[NR_KEYS] = {
686 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
687 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
688 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
689 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
690 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
691 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
692 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
693 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
694 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
695 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
696 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
697 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
698 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
699 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
700 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
701 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
702 }, ctrl_map[NR_KEYS] = {
703 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
704 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
705 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
706 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
707 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
708 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
709 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
710 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
711 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
712 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
713 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
714 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
715 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
716 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
717 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
718 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
719 };
720
721 const u_short * const linux_keytabs[] = {
722 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
723 };
724 #endif
725
726 static struct biosdisk_info *
727 fd2biosinfo(struct proc *p, struct file *fp)
728 {
729 struct vnode *vp;
730 const char *blkname;
731 char diskname[16];
732 int i;
733 struct nativedisk_info *nip;
734 struct disklist *dl = x86_alldisks;
735
736 if (dl == NULL)
737 return NULL;
738 if (fp->f_type != DTYPE_VNODE)
739 return NULL;
740 vp = (struct vnode *)fp->f_data;
741
742 if (vp->v_type != VBLK)
743 return NULL;
744
745 blkname = devsw_blk2name(major(vp->v_rdev));
746 snprintf(diskname, sizeof diskname, "%s%llu", blkname,
747 (unsigned long long)DISKUNIT(vp->v_rdev));
748
749 for (i = 0; i < dl->dl_nnativedisks; i++) {
750 nip = &dl->dl_nativedisks[i];
751 if (strcmp(diskname, nip->ni_devname))
752 continue;
753 if (nip->ni_nmatches != 0)
754 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
755 }
756
757 return NULL;
758 }
759
760
761 /*
762 * We come here in a last attempt to satisfy a Linux ioctl() call
763 */
764 int
765 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
766 {
767 /* {
768 syscallarg(int) fd;
769 syscallarg(u_long) com;
770 syscallarg(void *) data;
771 } */
772 struct sys_ioctl_args bia;
773 u_long com;
774 int error, error1;
775 #if (NWSDISPLAY > 0)
776 struct vt_mode lvt;
777 struct kbentry kbe;
778 #endif
779 struct linux_hd_geometry hdg;
780 struct linux_hd_big_geometry hdg_big;
781 struct biosdisk_info *bip;
782 file_t *fp;
783 int fd;
784 struct disklabel label;
785 struct partinfo partp;
786 int (*ioctlf)(struct file *, u_long, void *);
787 u_long start, biostotal, realtotal;
788 u_char heads, sectors;
789 u_int cylinders;
790 struct ioctl_pt pt;
791
792 fd = SCARG(uap, fd);
793 SCARG(&bia, fd) = fd;
794 SCARG(&bia, data) = SCARG(uap, data);
795 com = SCARG(uap, com);
796
797 if ((fp = fd_getfile(fd)) == NULL)
798 return (EBADF);
799
800 switch (com) {
801 #if (NWSDISPLAY > 0)
802 case LINUX_KDGKBMODE:
803 com = KDGKBMODE;
804 break;
805 case LINUX_KDSKBMODE:
806 com = KDSKBMODE;
807 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
808 SCARG(&bia, data) = (void *)K_RAW;
809 break;
810 case LINUX_KIOCSOUND:
811 SCARG(&bia, data) =
812 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
813 /* fall through */
814 case LINUX_KDMKTONE:
815 com = KDMKTONE;
816 break;
817 case LINUX_KDSETMODE:
818 com = KDSETMODE;
819 break;
820 case LINUX_KDGETMODE:
821 /* KD_* values are equal to the wscons numbers */
822 com = WSDISPLAYIO_GMODE;
823 break;
824 case LINUX_KDENABIO:
825 com = KDENABIO;
826 break;
827 case LINUX_KDDISABIO:
828 com = KDDISABIO;
829 break;
830 case LINUX_KDGETLED:
831 com = KDGETLED;
832 break;
833 case LINUX_KDSETLED:
834 com = KDSETLED;
835 break;
836 case LINUX_VT_OPENQRY:
837 com = VT_OPENQRY;
838 break;
839 case LINUX_VT_GETMODE:
840 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
841 if (error != 0)
842 goto out;
843 lvt.relsig = native_to_linux_signo[lvt.relsig];
844 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
845 lvt.frsig = native_to_linux_signo[lvt.frsig];
846 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
847 goto out;
848 case LINUX_VT_SETMODE:
849 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
850 if (error != 0)
851 goto out;
852 lvt.relsig = linux_to_native_signo[lvt.relsig];
853 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
854 lvt.frsig = linux_to_native_signo[lvt.frsig];
855 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
856 goto out;
857 case LINUX_VT_DISALLOCATE:
858 /* XXX should use WSDISPLAYIO_DELSCREEN */
859 error = 0;
860 goto out;
861 case LINUX_VT_RELDISP:
862 com = VT_RELDISP;
863 break;
864 case LINUX_VT_ACTIVATE:
865 com = VT_ACTIVATE;
866 break;
867 case LINUX_VT_WAITACTIVE:
868 com = VT_WAITACTIVE;
869 break;
870 case LINUX_VT_GETSTATE:
871 com = VT_GETSTATE;
872 break;
873 case LINUX_KDGKBTYPE:
874 {
875 static const u_int8_t kb101 = KB_101;
876
877 /* This is what Linux does. */
878 error = copyout(&kb101, SCARG(uap, data), 1);
879 goto out;
880 }
881 case LINUX_KDGKBENT:
882 /*
883 * The Linux KDGKBENT ioctl is different from the
884 * SYSV original. So we handle it in machdep code.
885 * XXX We should use keyboard mapping information
886 * from wsdisplay, but this would be expensive.
887 */
888 if ((error = copyin(SCARG(uap, data), &kbe,
889 sizeof(struct kbentry))))
890 goto out;
891 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
892 || kbe.kb_index >= NR_KEYS) {
893 error = EINVAL;
894 goto out;
895 }
896 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
897 error = copyout(&kbe, SCARG(uap, data),
898 sizeof(struct kbentry));
899 goto out;
900 #endif
901 case LINUX_HDIO_GETGEO:
902 case LINUX_HDIO_GETGEO_BIG:
903 /*
904 * Try to mimic Linux behaviour: return the BIOS geometry
905 * if possible (extending its # of cylinders if it's beyond
906 * the 1023 limit), fall back to the MI geometry (i.e.
907 * the real geometry) if not found, by returning an
908 * error. See common/linux_hdio.c
909 */
910 bip = fd2biosinfo(curproc, fp);
911 ioctlf = fp->f_ops->fo_ioctl;
912 error = ioctlf(fp, DIOCGDINFO, (void *)&label);
913 error1 = ioctlf(fp, DIOCGPARTINFO, (void *)&partp);
914 if (error != 0 && error1 != 0) {
915 error = error1;
916 goto out;
917 }
918 start = error1 != 0 ? partp.pi_offset : 0;
919 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
920 && bip->bi_cyl != 0) {
921 heads = bip->bi_head;
922 sectors = bip->bi_sec;
923 cylinders = bip->bi_cyl;
924 biostotal = heads * sectors * cylinders;
925 realtotal = label.d_ntracks * label.d_nsectors *
926 label.d_ncylinders;
927 if (realtotal > biostotal)
928 cylinders = realtotal / (heads * sectors);
929 } else {
930 heads = label.d_ntracks;
931 cylinders = label.d_ncylinders;
932 sectors = label.d_nsectors;
933 }
934 if (com == LINUX_HDIO_GETGEO) {
935 hdg.start = start;
936 hdg.heads = heads;
937 hdg.cylinders = cylinders;
938 hdg.sectors = sectors;
939 error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
940 goto out;
941 } else {
942 hdg_big.start = start;
943 hdg_big.heads = heads;
944 hdg_big.cylinders = cylinders;
945 hdg_big.sectors = sectors;
946 error = copyout(&hdg_big, SCARG(uap, data),
947 sizeof hdg_big);
948 goto out;
949 }
950
951 default:
952 /*
953 * Unknown to us. If it's on a device, just pass it through
954 * using PTIOCLINUX, the device itself might be able to
955 * make some sense of it.
956 * XXX hack: if the function returns EJUSTRETURN,
957 * it has stuffed a sysctl return value in pt.data.
958 */
959 ioctlf = fp->f_ops->fo_ioctl;
960 pt.com = SCARG(uap, com);
961 pt.data = SCARG(uap, data);
962 error = ioctlf(fp, PTIOCLINUX, &pt);
963 if (error == EJUSTRETURN) {
964 retval[0] = (register_t)pt.data;
965 error = 0;
966 }
967
968 if (error == ENOTTY) {
969 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
970 com));
971 }
972 goto out;
973 }
974 SCARG(&bia, com) = com;
975 error = sys_ioctl(curlwp, &bia, retval);
976 out:
977 fd_putfile(fd);
978 return error;
979 }
980
981 /*
982 * Set I/O permissions for a process. Just set the maximum level
983 * right away (ignoring the argument), otherwise we would have
984 * to rely on I/O permission maps, which are not implemented.
985 */
986 int
987 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
988 {
989 /* {
990 syscallarg(int) level;
991 } */
992 struct trapframe *fp = l->l_md.md_regs;
993
994 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
995 NULL, NULL, NULL, NULL) != 0)
996 return EPERM;
997 fp->tf_eflags |= PSL_IOPL;
998 *retval = 0;
999 return 0;
1000 }
1001
1002 /*
1003 * See above. If a root process tries to set access to an I/O port,
1004 * just let it have the whole range.
1005 */
1006 int
1007 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1008 {
1009 /* {
1010 syscallarg(unsigned int) lo;
1011 syscallarg(unsigned int) hi;
1012 syscallarg(int) val;
1013 } */
1014 struct trapframe *fp = l->l_md.md_regs;
1015
1016 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1017 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1018 NULL, NULL) != 0)
1019 return EPERM;
1020 if (SCARG(uap, val))
1021 fp->tf_eflags |= PSL_IOPL;
1022 *retval = 0;
1023 return 0;
1024 }
1025
1026 int
1027 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1028 void *arg)
1029 {
1030 return 0;
1031 }
1032
1033 const char *
1034 linux_get_uname_arch(void)
1035 {
1036 static char uname_arch[5] = "i386";
1037
1038 if (uname_arch[1] == '3')
1039 uname_arch[1] += cpu_class;
1040 return uname_arch;
1041 }
1042