linux_machdep.c revision 1.168 1 /* $NetBSD: linux_machdep.c,v 1.168 2021/09/07 11:43:04 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.168 2021/09/07 11:43:04 riastradh Exp $");
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_user_ldt.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/signalvar.h>
42 #include <sys/kernel.h>
43 #include <sys/proc.h>
44 #include <sys/buf.h>
45 #include <sys/reboot.h>
46 #include <sys/conf.h>
47 #include <sys/exec.h>
48 #include <sys/file.h>
49 #include <sys/callout.h>
50 #include <sys/mbuf.h>
51 #include <sys/msgbuf.h>
52 #include <sys/mount.h>
53 #include <sys/vnode.h>
54 #include <sys/device.h>
55 #include <sys/syscallargs.h>
56 #include <sys/filedesc.h>
57 #include <sys/exec_elf.h>
58 #include <sys/disklabel.h>
59 #include <sys/ioctl.h>
60 #include <sys/wait.h>
61 #include <sys/kauth.h>
62 #include <sys/kmem.h>
63
64 #include <miscfs/specfs/specdev.h>
65
66 #include <compat/linux/common/linux_types.h>
67 #include <compat/linux/common/linux_signal.h>
68 #include <compat/linux/common/linux_util.h>
69 #include <compat/linux/common/linux_ioctl.h>
70 #include <compat/linux/common/linux_hdio.h>
71 #include <compat/linux/common/linux_exec.h>
72 #include <compat/linux/common/linux_machdep.h>
73 #include <compat/linux/common/linux_errno.h>
74
75 #include <compat/linux/linux_syscallargs.h>
76
77 #include <sys/cpu.h>
78 #include <machine/cpufunc.h>
79 #include <machine/psl.h>
80 #include <machine/reg.h>
81 #include <machine/segments.h>
82 #include <machine/specialreg.h>
83 #include <machine/sysarch.h>
84 #include <machine/vmparam.h>
85
86 #include <x86/fpu.h>
87
88 /*
89 * To see whether wscons is configured (for virtual console ioctl calls).
90 */
91 #if defined(_KERNEL_OPT)
92 #include "wsdisplay.h"
93 #endif
94 #if (NWSDISPLAY > 0)
95 #include <dev/wscons/wsconsio.h>
96 #include <dev/wscons/wsdisplay_usl_io.h>
97 #if defined(_KERNEL_OPT)
98 #include "opt_xserver.h"
99 #endif
100 #endif
101
102 #ifdef DEBUG_LINUX
103 #define DPRINTF(a) uprintf a
104 #else
105 #define DPRINTF(a)
106 #endif
107
108 extern struct disklist *x86_alldisks;
109
110 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
111 static void linux_save_ucontext(struct lwp *, struct trapframe *,
112 const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
113 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
114 const sigset_t *, struct linux_sigcontext *);
115 static int linux_restore_sigcontext(struct lwp *,
116 struct linux_sigcontext *, register_t *);
117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
119
120 extern char linux_sigcode[], linux_rt_sigcode[];
121
122 /*
123 * Deal with some i386-specific things in the Linux emulation code.
124 */
125
126 void
127 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
128 {
129 struct trapframe *tf;
130
131 #ifdef USER_LDT
132 pmap_ldt_cleanup(l);
133 #endif
134
135 fpu_clear(l, __Linux_NPXCW__);
136
137 tf = l->l_md.md_regs;
138 tf->tf_gs = 0;
139 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
140 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
141 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
142 tf->tf_edi = 0;
143 tf->tf_esi = 0;
144 tf->tf_ebp = 0;
145 tf->tf_ebx = l->l_proc->p_psstrp;
146 tf->tf_edx = 0;
147 tf->tf_ecx = 0;
148 tf->tf_eax = 0;
149 tf->tf_eip = epp->ep_entry;
150 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
151 tf->tf_eflags = PSL_USERSET;
152 tf->tf_esp = stack;
153 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
154 }
155
156 /*
157 * Send an interrupt to process.
158 *
159 * Stack is set up to allow sigcode stored
160 * in u. to call routine, followed by kcall
161 * to sigreturn routine below. After sigreturn
162 * resets the signal mask, the stack, and the
163 * frame pointer, it returns to the user
164 * specified pc, psl.
165 */
166
167 void
168 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
169 {
170 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
171 linux_rt_sendsig(ksi, mask);
172 else
173 linux_old_sendsig(ksi, mask);
174 }
175
176
177 static void
178 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
179 {
180 uc->uc_flags = 0;
181 uc->uc_link = NULL;
182 native_to_linux_sigaltstack(&uc->uc_stack, sas);
183 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
184 native_to_linux_sigset(&uc->uc_sigmask, mask);
185 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
186 }
187
188 static void
189 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
190 const sigset_t *mask, struct linux_sigcontext *sc)
191 {
192 struct pcb *pcb = lwp_getpcb(l);
193
194 /* Save register context. */
195 sc->sc_gs = tf->tf_gs;
196 sc->sc_fs = tf->tf_fs;
197 sc->sc_es = tf->tf_es;
198 sc->sc_ds = tf->tf_ds;
199 sc->sc_eflags = tf->tf_eflags;
200
201 sc->sc_edi = tf->tf_edi;
202 sc->sc_esi = tf->tf_esi;
203 sc->sc_esp = tf->tf_esp;
204 sc->sc_ebp = tf->tf_ebp;
205 sc->sc_ebx = tf->tf_ebx;
206 sc->sc_edx = tf->tf_edx;
207 sc->sc_ecx = tf->tf_ecx;
208 sc->sc_eax = tf->tf_eax;
209 sc->sc_eip = tf->tf_eip;
210 sc->sc_cs = tf->tf_cs;
211 sc->sc_esp_at_signal = tf->tf_esp;
212 sc->sc_ss = tf->tf_ss;
213 sc->sc_err = tf->tf_err;
214 sc->sc_trapno = tf->tf_trapno;
215 sc->sc_cr2 = pcb->pcb_cr2;
216 sc->sc_387 = NULL;
217
218 /* Save signal stack. */
219 /* Linux doesn't save the onstack flag in sigframe */
220
221 /* Save signal mask. */
222 native_to_linux_old_sigset(&sc->sc_mask, mask);
223 }
224
225 static void
226 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
227 {
228 struct lwp *l = curlwp;
229 struct proc *p = l->l_proc;
230 struct trapframe *tf;
231 struct linux_rt_sigframe *fp, frame;
232 int onstack, error;
233 int sig = ksi->ksi_signo;
234 sig_t catcher = SIGACTION(p, sig).sa_handler;
235 struct sigaltstack *sas = &l->l_sigstk;
236
237 tf = l->l_md.md_regs;
238 /* Do we need to jump onto the signal stack? */
239 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
240 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
241
242
243 /* Allocate space for the signal handler context. */
244 if (onstack)
245 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
246 sas->ss_size);
247 else
248 fp = (struct linux_rt_sigframe *)tf->tf_esp;
249 fp--;
250
251 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
252 onstack, fp, sig, tf->tf_eip,
253 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
254
255 memset(&frame, 0, sizeof(frame));
256
257 /* Build stack frame for signal trampoline. */
258 frame.sf_handler = catcher;
259 frame.sf_sig = native_to_linux_signo[sig];
260 frame.sf_sip = &fp->sf_si;
261 frame.sf_ucp = &fp->sf_uc;
262
263 /*
264 * XXX: the following code assumes that the constants for
265 * siginfo are the same between linux and NetBSD.
266 */
267 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info);
268
269 /* Save register context. */
270 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
271 sendsig_reset(l, sig);
272
273 mutex_exit(p->p_lock);
274 error = copyout(&frame, fp, sizeof(frame));
275 mutex_enter(p->p_lock);
276
277 if (error != 0) {
278 /*
279 * Process has trashed its stack; give it an illegal
280 * instruction to halt it in its tracks.
281 */
282 sigexit(l, SIGILL);
283 /* NOTREACHED */
284 }
285
286 /*
287 * Build context to run handler in.
288 */
289 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
290 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
291 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
292 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
293 (linux_rt_sigcode - linux_sigcode);
294 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
295 tf->tf_eflags &= ~PSL_CLEARSIG;
296 tf->tf_esp = (int)fp;
297 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
298
299 /* Remember that we're now on the signal stack. */
300 if (onstack)
301 sas->ss_flags |= SS_ONSTACK;
302 }
303
304 static void
305 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
306 {
307 struct lwp *l = curlwp;
308 struct proc *p = l->l_proc;
309 struct trapframe *tf;
310 struct linux_sigframe *fp, frame;
311 int onstack, error;
312 int sig = ksi->ksi_signo;
313 sig_t catcher = SIGACTION(p, sig).sa_handler;
314 struct sigaltstack *sas = &l->l_sigstk;
315
316 tf = l->l_md.md_regs;
317
318 /* Do we need to jump onto the signal stack? */
319 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
320 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
321
322 /* Allocate space for the signal handler context. */
323 if (onstack)
324 fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
325 sas->ss_size);
326 else
327 fp = (struct linux_sigframe *)tf->tf_esp;
328 fp--;
329
330 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
331 onstack, fp, sig, tf->tf_eip,
332 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
333
334 memset(&frame, 0, sizeof(frame));
335
336 /* Build stack frame for signal trampoline. */
337 frame.sf_handler = catcher;
338 frame.sf_sig = native_to_linux_signo[sig];
339
340 linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
341 sendsig_reset(l, sig);
342
343 mutex_exit(p->p_lock);
344 error = copyout(&frame, fp, sizeof(frame));
345 mutex_enter(p->p_lock);
346
347 if (error != 0) {
348 /*
349 * Process has trashed its stack; give it an illegal
350 * instruction to halt it in its tracks.
351 */
352 sigexit(l, SIGILL);
353 /* NOTREACHED */
354 }
355
356 /*
357 * Build context to run handler in.
358 */
359 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
360 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
361 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
362 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
363 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
364 tf->tf_eflags &= ~PSL_CLEARSIG;
365 tf->tf_esp = (int)fp;
366 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
367
368 /* Remember that we're now on the signal stack. */
369 if (onstack)
370 sas->ss_flags |= SS_ONSTACK;
371 }
372
373 /*
374 * System call to cleanup state after a signal
375 * has been taken. Reset signal mask and
376 * stack state from context left by sendsig (above).
377 * Return to previous pc and psl as specified by
378 * context left by sendsig. Check carefully to
379 * make sure that the user has not modified the
380 * psl to gain improper privileges or to cause
381 * a machine fault.
382 */
383 int
384 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
385 {
386 /* {
387 syscallarg(struct linux_ucontext *) ucp;
388 } */
389 struct linux_ucontext context, *ucp = SCARG(uap, ucp);
390 int error;
391
392 /*
393 * The trampoline code hands us the context.
394 * It is unsafe to keep track of it ourselves, in the event that a
395 * program jumps out of a signal handler.
396 */
397 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
398 return error;
399
400 /* XXX XAX we can do better here by using more of the ucontext */
401 return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
402 }
403
404 int
405 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
406 {
407 /* {
408 syscallarg(struct linux_sigcontext *) scp;
409 } */
410 struct linux_sigcontext context, *scp = SCARG(uap, scp);
411 int error;
412
413 /*
414 * The trampoline code hands us the context.
415 * It is unsafe to keep track of it ourselves, in the event that a
416 * program jumps out of a signal handler.
417 */
418 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
419 return error;
420 return linux_restore_sigcontext(l, &context, retval);
421 }
422
423 static int
424 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
425 register_t *retval)
426 {
427 struct proc *p = l->l_proc;
428 struct sigaltstack *sas = &l->l_sigstk;
429 struct trapframe *tf;
430 sigset_t mask;
431 ssize_t ss_gap;
432
433 /* Restore register context. */
434 tf = l->l_md.md_regs;
435 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
436
437 /*
438 * Check for security violations. If we're returning to
439 * protected mode, the CPU will validate the segment registers
440 * automatically and generate a trap on violations. We handle
441 * the trap, rather than doing all of the checking here.
442 */
443 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
444 !USERMODE(scp->sc_cs))
445 return EINVAL;
446
447 tf->tf_gs = scp->sc_gs;
448 tf->tf_fs = scp->sc_fs;
449 tf->tf_es = scp->sc_es;
450 tf->tf_ds = scp->sc_ds;
451 tf->tf_eflags = scp->sc_eflags;
452
453 tf->tf_edi = scp->sc_edi;
454 tf->tf_esi = scp->sc_esi;
455 tf->tf_ebp = scp->sc_ebp;
456 tf->tf_ebx = scp->sc_ebx;
457 tf->tf_edx = scp->sc_edx;
458 tf->tf_ecx = scp->sc_ecx;
459 tf->tf_eax = scp->sc_eax;
460 tf->tf_eip = scp->sc_eip;
461 tf->tf_cs = scp->sc_cs;
462 tf->tf_esp = scp->sc_esp_at_signal;
463 tf->tf_ss = scp->sc_ss;
464
465 /* Restore signal stack. */
466 /*
467 * Linux really does it this way; it doesn't have space in sigframe
468 * to save the onstack flag.
469 */
470 mutex_enter(p->p_lock);
471 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
472 if (ss_gap >= 0 && ss_gap < sas->ss_size)
473 sas->ss_flags |= SS_ONSTACK;
474 else
475 sas->ss_flags &= ~SS_ONSTACK;
476
477 /* Restore signal mask. */
478 linux_old_to_native_sigset(&mask, &scp->sc_mask);
479 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
480 mutex_exit(p->p_lock);
481
482 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
483 return EJUSTRETURN;
484 }
485
486 #ifdef USER_LDT
487
488 static int
489 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
490 register_t *retval)
491 {
492 struct x86_get_ldt_args gl;
493 int error;
494 union descriptor *ldt_buf;
495 size_t sz;
496
497 /*
498 * I've checked the linux code - this function is asymmetric with
499 * linux_write_ldt, and returns raw ldt entries.
500 * NB, the code I saw zerod the spare parts of the user buffer.
501 */
502
503 DPRINTF(("linux_read_ldt!"));
504
505 sz = 8192 * sizeof(*ldt_buf);
506 ldt_buf = kmem_zalloc(sz, KM_SLEEP);
507 gl.start = 0;
508 gl.desc = NULL;
509 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
510 error = x86_get_ldt1(l, &gl, ldt_buf);
511 /* NB gl.num might have changed */
512 if (error == 0) {
513 *retval = gl.num * sizeof(*ldtstore);
514 error = copyout(ldt_buf, SCARG(uap, ptr),
515 gl.num * sizeof *ldt_buf);
516 }
517 kmem_free(ldt_buf, sz);
518
519 return error;
520 }
521
522 struct linux_ldt_info {
523 u_int entry_number;
524 u_long base_addr;
525 u_int limit;
526 u_int seg_32bit:1;
527 u_int contents:2;
528 u_int read_exec_only:1;
529 u_int limit_in_pages:1;
530 u_int seg_not_present:1;
531 u_int useable:1;
532 };
533
534 static int
535 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
536 int oldmode)
537 {
538 struct linux_ldt_info ldt_info;
539 union descriptor d;
540 struct x86_set_ldt_args sl;
541 int error;
542
543 DPRINTF(("linux_write_ldt %d\n", oldmode));
544 if (SCARG(uap, bytecount) != sizeof(ldt_info))
545 return (EINVAL);
546 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
547 return error;
548 if (ldt_info.entry_number >= 8192)
549 return (EINVAL);
550 if (ldt_info.contents == 3) {
551 if (oldmode)
552 return (EINVAL);
553 if (ldt_info.seg_not_present)
554 return (EINVAL);
555 }
556
557 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
558 (oldmode || (ldt_info.contents == 0 &&
559 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
560 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
561 ldt_info.useable == 0))) {
562 /* this means you should zero the ldt */
563 (void)memset(&d, 0, sizeof(d));
564 } else {
565 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
566 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
567 d.sd.sd_lolimit = ldt_info.limit & 0xffff;
568 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
569 d.sd.sd_type = 16 | (ldt_info.contents << 2) |
570 (!ldt_info.read_exec_only << 1);
571 d.sd.sd_dpl = SEL_UPL;
572 d.sd.sd_p = !ldt_info.seg_not_present;
573 d.sd.sd_def32 = ldt_info.seg_32bit;
574 d.sd.sd_gran = ldt_info.limit_in_pages;
575 if (!oldmode)
576 d.sd.sd_xx = ldt_info.useable;
577 else
578 d.sd.sd_xx = 0;
579 }
580 sl.start = ldt_info.entry_number;
581 sl.desc = NULL;
582 sl.num = 1;
583
584 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
585 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
586
587 return x86_set_ldt1(l, &sl, &d);
588 }
589
590 #endif /* USER_LDT */
591
592 int
593 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
594 {
595 /* {
596 syscallarg(int) func;
597 syscallarg(void *) ptr;
598 syscallarg(size_t) bytecount;
599 } */
600
601 switch (SCARG(uap, func)) {
602 #ifdef USER_LDT
603 case 0:
604 return linux_read_ldt(l, (const void *)uap, retval);
605 case 1:
606 return linux_write_ldt(l, (const void *)uap, 1);
607 case 2:
608 #ifdef notyet
609 return linux_read_default_ldt(l, (const void *)uap, retval);
610 #else
611 return (ENOSYS);
612 #endif
613 case 0x11:
614 return linux_write_ldt(l, (const void *)uap, 0);
615 #endif /* USER_LDT */
616
617 default:
618 return (ENOSYS);
619 }
620 }
621
622 /*
623 * XXX Pathetic hack to make svgalib work. This will fake the major
624 * device number of an opened VT so that svgalib likes it. grmbl.
625 * Should probably do it 'wrong the right way' and use a mapping
626 * array for all major device numbers, and map linux_mknod too.
627 */
628 dev_t
629 linux_fakedev(dev_t dev, int raw)
630 {
631 extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
632 const struct cdevsw *cd = cdevsw_lookup(dev);
633
634 if (raw) {
635 #if (NWSDISPLAY > 0)
636 extern const struct cdevsw wsdisplay_cdevsw;
637 if (cd == &wsdisplay_cdevsw)
638 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
639 #endif
640 }
641
642 if (cd == &ptc_cdevsw)
643 return makedev(LINUX_PTC_MAJOR, minor(dev));
644 if (cd == &pts_cdevsw)
645 return makedev(LINUX_PTS_MAJOR, minor(dev));
646
647 return dev;
648 }
649
650 #if (NWSDISPLAY > 0)
651 /*
652 * That's not complete, but enough to get an X server running.
653 */
654 #define NR_KEYS 128
655 static const u_short plain_map[NR_KEYS] = {
656 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
657 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
658 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
659 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
660 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
661 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
662 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
663 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
664 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
665 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
666 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
667 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
668 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
669 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
670 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
671 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
672 }, shift_map[NR_KEYS] = {
673 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
674 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
675 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
676 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
677 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
678 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
679 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
680 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
681 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
682 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
683 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
684 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
685 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
686 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
687 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
688 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
689 }, altgr_map[NR_KEYS] = {
690 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
691 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
692 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
693 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
694 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
695 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
696 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
697 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
698 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
699 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
700 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
701 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
702 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
703 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
704 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
705 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
706 }, ctrl_map[NR_KEYS] = {
707 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
708 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
709 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
710 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
711 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
712 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
713 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
714 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
715 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
716 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
717 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
718 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
719 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
720 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
721 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
722 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
723 };
724
725 const u_short * const linux_keytabs[] = {
726 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
727 };
728 #endif
729
730 static struct biosdisk_info *
731 fd2biosinfo(struct proc *p, struct file *fp)
732 {
733 struct vnode *vp;
734 const char *blkname;
735 char diskname[16];
736 int i;
737 struct nativedisk_info *nip;
738 struct disklist *dl = x86_alldisks;
739
740 if (dl == NULL)
741 return NULL;
742 if (fp->f_type != DTYPE_VNODE)
743 return NULL;
744 vp = (struct vnode *)fp->f_data;
745
746 if (vp->v_type != VBLK)
747 return NULL;
748
749 blkname = devsw_blk2name(major(vp->v_rdev));
750 snprintf(diskname, sizeof diskname, "%s%llu", blkname,
751 (unsigned long long)DISKUNIT(vp->v_rdev));
752
753 for (i = 0; i < dl->dl_nnativedisks; i++) {
754 nip = &dl->dl_nativedisks[i];
755 if (strcmp(diskname, nip->ni_devname))
756 continue;
757 if (nip->ni_nmatches != 0)
758 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
759 }
760
761 return NULL;
762 }
763
764
765 /*
766 * We come here in a last attempt to satisfy a Linux ioctl() call
767 */
768 int
769 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
770 {
771 /* {
772 syscallarg(int) fd;
773 syscallarg(u_long) com;
774 syscallarg(void *) data;
775 } */
776 struct sys_ioctl_args bia;
777 u_long com;
778 int error, error1;
779 #if (NWSDISPLAY > 0)
780 struct vt_mode lvt;
781 struct kbentry kbe;
782 #endif
783 struct linux_hd_geometry hdg;
784 struct linux_hd_big_geometry hdg_big;
785 struct biosdisk_info *bip;
786 file_t *fp;
787 int fd;
788 struct disklabel label;
789 struct partinfo partp;
790 int (*ioctlf)(struct file *, u_long, void *);
791 u_long start, biostotal, realtotal;
792 u_char heads, sectors;
793 u_int cylinders;
794 struct ioctl_pt pt;
795
796 fd = SCARG(uap, fd);
797 SCARG(&bia, fd) = fd;
798 SCARG(&bia, data) = SCARG(uap, data);
799 com = SCARG(uap, com);
800
801 if ((fp = fd_getfile(fd)) == NULL)
802 return (EBADF);
803
804 switch (com) {
805 #if (NWSDISPLAY > 0)
806 case LINUX_KDGKBMODE:
807 com = KDGKBMODE;
808 break;
809 case LINUX_KDSKBMODE:
810 com = KDSKBMODE;
811 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
812 SCARG(&bia, data) = (void *)K_RAW;
813 break;
814 case LINUX_KIOCSOUND:
815 SCARG(&bia, data) =
816 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
817 /* fall through */
818 case LINUX_KDMKTONE:
819 com = KDMKTONE;
820 break;
821 case LINUX_KDSETMODE:
822 com = KDSETMODE;
823 break;
824 case LINUX_KDGETMODE:
825 /* KD_* values are equal to the wscons numbers */
826 com = WSDISPLAYIO_GMODE;
827 break;
828 case LINUX_KDENABIO:
829 com = KDENABIO;
830 break;
831 case LINUX_KDDISABIO:
832 com = KDDISABIO;
833 break;
834 case LINUX_KDGETLED:
835 com = KDGETLED;
836 break;
837 case LINUX_KDSETLED:
838 com = KDSETLED;
839 break;
840 case LINUX_VT_OPENQRY:
841 com = VT_OPENQRY;
842 break;
843 case LINUX_VT_GETMODE:
844 memset(&lvt, 0, sizeof(lvt));
845 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
846 if (error != 0)
847 goto out;
848 lvt.relsig = native_to_linux_signo[lvt.relsig];
849 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
850 lvt.frsig = native_to_linux_signo[lvt.frsig];
851 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
852 goto out;
853 case LINUX_VT_SETMODE:
854 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
855 if (error != 0)
856 goto out;
857 lvt.relsig = linux_to_native_signo[lvt.relsig];
858 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
859 lvt.frsig = linux_to_native_signo[lvt.frsig];
860 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
861 goto out;
862 case LINUX_VT_DISALLOCATE:
863 /* XXX should use WSDISPLAYIO_DELSCREEN */
864 error = 0;
865 goto out;
866 case LINUX_VT_RELDISP:
867 com = VT_RELDISP;
868 break;
869 case LINUX_VT_ACTIVATE:
870 com = VT_ACTIVATE;
871 break;
872 case LINUX_VT_WAITACTIVE:
873 com = VT_WAITACTIVE;
874 break;
875 case LINUX_VT_GETSTATE:
876 com = VT_GETSTATE;
877 break;
878 case LINUX_KDGKBTYPE:
879 {
880 static const u_int8_t kb101 = KB_101;
881
882 /* This is what Linux does. */
883 error = copyout(&kb101, SCARG(uap, data), 1);
884 goto out;
885 }
886 case LINUX_KDGKBENT:
887 /*
888 * The Linux KDGKBENT ioctl is different from the
889 * SYSV original. So we handle it in machdep code.
890 * XXX We should use keyboard mapping information
891 * from wsdisplay, but this would be expensive.
892 */
893 if ((error = copyin(SCARG(uap, data), &kbe,
894 sizeof(struct kbentry))))
895 goto out;
896 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
897 || kbe.kb_index >= NR_KEYS) {
898 error = EINVAL;
899 goto out;
900 }
901 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
902 error = copyout(&kbe, SCARG(uap, data),
903 sizeof(struct kbentry));
904 goto out;
905 #endif
906 case LINUX_HDIO_GETGEO:
907 case LINUX_HDIO_GETGEO_BIG:
908 /*
909 * Try to mimic Linux behaviour: return the BIOS geometry
910 * if possible (extending its # of cylinders if it's beyond
911 * the 1023 limit), fall back to the MI geometry (i.e.
912 * the real geometry) if not found, by returning an
913 * error. See common/linux_hdio.c
914 */
915 bip = fd2biosinfo(curproc, fp);
916 ioctlf = fp->f_ops->fo_ioctl;
917 error = ioctlf(fp, DIOCGDINFO, (void *)&label);
918 error1 = ioctlf(fp, DIOCGPARTINFO, (void *)&partp);
919 if (error != 0 && error1 != 0) {
920 error = error1;
921 goto out;
922 }
923 start = error1 != 0 ? partp.pi_offset : 0;
924 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
925 && bip->bi_cyl != 0) {
926 heads = bip->bi_head;
927 sectors = bip->bi_sec;
928 cylinders = bip->bi_cyl;
929 biostotal = heads * sectors * cylinders;
930 realtotal = label.d_ntracks * label.d_nsectors *
931 label.d_ncylinders;
932 if (realtotal > biostotal)
933 cylinders = realtotal / (heads * sectors);
934 } else {
935 heads = label.d_ntracks;
936 cylinders = label.d_ncylinders;
937 sectors = label.d_nsectors;
938 }
939 if (com == LINUX_HDIO_GETGEO) {
940 memset(&hdg, 0, sizeof(hdg));
941 hdg.start = start;
942 hdg.heads = heads;
943 hdg.cylinders = cylinders;
944 hdg.sectors = sectors;
945 error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
946 goto out;
947 } else {
948 memset(&hdg_big, 0, sizeof(hdg_big));
949 hdg_big.start = start;
950 hdg_big.heads = heads;
951 hdg_big.cylinders = cylinders;
952 hdg_big.sectors = sectors;
953 error = copyout(&hdg_big, SCARG(uap, data),
954 sizeof hdg_big);
955 goto out;
956 }
957
958 default:
959 /*
960 * Unknown to us. If it's on a device, just pass it through
961 * using PTIOCLINUX, the device itself might be able to
962 * make some sense of it.
963 * XXX hack: if the function returns EJUSTRETURN,
964 * it has stuffed a sysctl return value in pt.data.
965 */
966 ioctlf = fp->f_ops->fo_ioctl;
967 pt.com = SCARG(uap, com);
968 pt.data = SCARG(uap, data);
969 error = ioctlf(fp, PTIOCLINUX, &pt);
970 if (error == EJUSTRETURN) {
971 retval[0] = (register_t)pt.data;
972 error = 0;
973 }
974
975 if (error == ENOTTY) {
976 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
977 com));
978 }
979 goto out;
980 }
981 SCARG(&bia, com) = com;
982 error = sys_ioctl(curlwp, &bia, retval);
983 out:
984 fd_putfile(fd);
985 return error;
986 }
987
988 /*
989 * Set I/O permissions for a process. Just set the maximum level
990 * right away (ignoring the argument), otherwise we would have
991 * to rely on I/O permission maps, which are not implemented.
992 */
993 int
994 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
995 {
996 /* {
997 syscallarg(int) level;
998 } */
999 struct trapframe *fp = l->l_md.md_regs;
1000
1001 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1002 NULL, NULL, NULL, NULL) != 0)
1003 return EPERM;
1004 fp->tf_eflags |= PSL_IOPL;
1005 *retval = 0;
1006 return 0;
1007 }
1008
1009 /*
1010 * See above. If a root process tries to set access to an I/O port,
1011 * just let it have the whole range.
1012 */
1013 int
1014 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1015 {
1016 /* {
1017 syscallarg(unsigned int) lo;
1018 syscallarg(unsigned int) hi;
1019 syscallarg(int) val;
1020 } */
1021 struct trapframe *fp = l->l_md.md_regs;
1022
1023 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1024 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1025 NULL, NULL) != 0)
1026 return EPERM;
1027 if (SCARG(uap, val))
1028 fp->tf_eflags |= PSL_IOPL;
1029 *retval = 0;
1030 return 0;
1031 }
1032
1033 int
1034 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1035 void *arg)
1036 {
1037 return 0;
1038 }
1039
1040 const char *
1041 linux_get_uname_arch(void)
1042 {
1043 static char uname_arch[5] = "i386";
1044
1045 if (uname_arch[1] == '3')
1046 uname_arch[1] += cpu_class;
1047 return uname_arch;
1048 }
1049