linux_machdep.c revision 1.163.4.1 1 /* $NetBSD: linux_machdep.c,v 1.163.4.1 2017/04/27 05:36:35 pgoyette Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.163.4.1 2017/04/27 05:36:35 pgoyette Exp $");
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_vm86.h"
37 #include "opt_user_ldt.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/kernel.h>
44 #include <sys/proc.h>
45 #include <sys/buf.h>
46 #include <sys/reboot.h>
47 #include <sys/conf.h>
48 #include <sys/exec.h>
49 #include <sys/file.h>
50 #include <sys/callout.h>
51 #include <sys/mbuf.h>
52 #include <sys/msgbuf.h>
53 #include <sys/mount.h>
54 #include <sys/vnode.h>
55 #include <sys/device.h>
56 #include <sys/syscallargs.h>
57 #include <sys/filedesc.h>
58 #include <sys/exec_elf.h>
59 #include <sys/disklabel.h>
60 #include <sys/ioctl.h>
61 #include <sys/wait.h>
62 #include <sys/kauth.h>
63 #include <sys/kmem.h>
64
65 #include <miscfs/specfs/specdev.h>
66
67 #include <compat/linux/common/linux_types.h>
68 #include <compat/linux/common/linux_signal.h>
69 #include <compat/linux/common/linux_util.h>
70 #include <compat/linux/common/linux_ioctl.h>
71 #include <compat/linux/common/linux_hdio.h>
72 #include <compat/linux/common/linux_exec.h>
73 #include <compat/linux/common/linux_machdep.h>
74 #include <compat/linux/common/linux_errno.h>
75
76 #include <compat/linux/linux_syscallargs.h>
77
78 #include <sys/cpu.h>
79 #include <machine/cpufunc.h>
80 #include <machine/psl.h>
81 #include <machine/reg.h>
82 #include <machine/segments.h>
83 #include <machine/specialreg.h>
84 #include <machine/sysarch.h>
85 #include <machine/vm86.h>
86 #include <machine/vmparam.h>
87
88 #include <x86/fpu.h>
89
90 /*
91 * To see whether wscons is configured (for virtual console ioctl calls).
92 */
93 #if defined(_KERNEL_OPT)
94 #include "wsdisplay.h"
95 #endif
96 #if (NWSDISPLAY > 0)
97 #include <dev/wscons/wsconsio.h>
98 #include <dev/wscons/wsdisplay_usl_io.h>
99 #if defined(_KERNEL_OPT)
100 #include "opt_xserver.h"
101 #endif
102 #endif
103
104 #ifdef DEBUG_LINUX
105 #define DPRINTF(a) uprintf a
106 #else
107 #define DPRINTF(a)
108 #endif
109
110 extern struct disklist *x86_alldisks;
111
112 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
113 static void linux_save_ucontext(struct lwp *, struct trapframe *,
114 const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
115 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
116 const sigset_t *, struct linux_sigcontext *);
117 static int linux_restore_sigcontext(struct lwp *,
118 struct linux_sigcontext *, register_t *);
119 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
120 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
121
122 extern char linux_sigcode[], linux_rt_sigcode[];
123
124 /*
125 * Deal with some i386-specific things in the Linux emulation code.
126 */
127
128 void
129 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
130 {
131 struct trapframe *tf;
132
133 #ifdef USER_LDT
134 pmap_ldt_cleanup(l);
135 #endif
136
137 fpu_save_area_clear(l, __Linux_NPXCW__);
138
139 tf = l->l_md.md_regs;
140 tf->tf_gs = 0;
141 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
142 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
143 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
144 tf->tf_edi = 0;
145 tf->tf_esi = 0;
146 tf->tf_ebp = 0;
147 tf->tf_ebx = l->l_proc->p_psstrp;
148 tf->tf_edx = 0;
149 tf->tf_ecx = 0;
150 tf->tf_eax = 0;
151 tf->tf_eip = epp->ep_entry;
152 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
153 tf->tf_eflags = PSL_USERSET;
154 tf->tf_esp = stack;
155 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
156 }
157
158 /*
159 * Send an interrupt to process.
160 *
161 * Stack is set up to allow sigcode stored
162 * in u. to call routine, followed by kcall
163 * to sigreturn routine below. After sigreturn
164 * resets the signal mask, the stack, and the
165 * frame pointer, it returns to the user
166 * specified pc, psl.
167 */
168
169 void
170 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
171 {
172 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
173 linux_rt_sendsig(ksi, mask);
174 else
175 linux_old_sendsig(ksi, mask);
176 }
177
178
179 static void
180 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
181 {
182 uc->uc_flags = 0;
183 uc->uc_link = NULL;
184 native_to_linux_sigaltstack(&uc->uc_stack, sas);
185 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
186 native_to_linux_sigset(&uc->uc_sigmask, mask);
187 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
188 }
189
190 static void
191 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
192 const sigset_t *mask, struct linux_sigcontext *sc)
193 {
194 struct pcb *pcb = lwp_getpcb(l);
195
196 /* Save register context. */
197 #ifdef VM86
198 if (tf->tf_eflags & PSL_VM) {
199 sc->sc_gs = tf->tf_vm86_gs;
200 sc->sc_fs = tf->tf_vm86_fs;
201 sc->sc_es = tf->tf_vm86_es;
202 sc->sc_ds = tf->tf_vm86_ds;
203 sc->sc_eflags = get_vflags(l);
204 } else
205 #endif
206 {
207 sc->sc_gs = tf->tf_gs;
208 sc->sc_fs = tf->tf_fs;
209 sc->sc_es = tf->tf_es;
210 sc->sc_ds = tf->tf_ds;
211 sc->sc_eflags = tf->tf_eflags;
212 }
213 sc->sc_edi = tf->tf_edi;
214 sc->sc_esi = tf->tf_esi;
215 sc->sc_esp = tf->tf_esp;
216 sc->sc_ebp = tf->tf_ebp;
217 sc->sc_ebx = tf->tf_ebx;
218 sc->sc_edx = tf->tf_edx;
219 sc->sc_ecx = tf->tf_ecx;
220 sc->sc_eax = tf->tf_eax;
221 sc->sc_eip = tf->tf_eip;
222 sc->sc_cs = tf->tf_cs;
223 sc->sc_esp_at_signal = tf->tf_esp;
224 sc->sc_ss = tf->tf_ss;
225 sc->sc_err = tf->tf_err;
226 sc->sc_trapno = tf->tf_trapno;
227 sc->sc_cr2 = pcb->pcb_cr2;
228 sc->sc_387 = NULL;
229
230 /* Save signal stack. */
231 /* Linux doesn't save the onstack flag in sigframe */
232
233 /* Save signal mask. */
234 native_to_linux_old_sigset(&sc->sc_mask, mask);
235 }
236
237 static void
238 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
239 {
240 struct lwp *l = curlwp;
241 struct proc *p = l->l_proc;
242 struct trapframe *tf;
243 struct linux_rt_sigframe *fp, frame;
244 int onstack, error;
245 int sig = ksi->ksi_signo;
246 sig_t catcher = SIGACTION(p, sig).sa_handler;
247 struct sigaltstack *sas = &l->l_sigstk;
248
249 tf = l->l_md.md_regs;
250 /* Do we need to jump onto the signal stack? */
251 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
252 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
253
254
255 /* Allocate space for the signal handler context. */
256 if (onstack)
257 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
258 sas->ss_size);
259 else
260 fp = (struct linux_rt_sigframe *)tf->tf_esp;
261 fp--;
262
263 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
264 onstack, fp, sig, tf->tf_eip,
265 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
266
267 /* Build stack frame for signal trampoline. */
268 frame.sf_handler = catcher;
269 frame.sf_sig = native_to_linux_signo[sig];
270 frame.sf_sip = &fp->sf_si;
271 frame.sf_ucp = &fp->sf_uc;
272
273 /*
274 * XXX: the following code assumes that the constants for
275 * siginfo are the same between linux and NetBSD.
276 */
277 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info);
278
279 /* Save register context. */
280 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
281 sendsig_reset(l, sig);
282
283 mutex_exit(p->p_lock);
284 error = copyout(&frame, fp, sizeof(frame));
285 mutex_enter(p->p_lock);
286
287 if (error != 0) {
288 /*
289 * Process has trashed its stack; give it an illegal
290 * instruction to halt it in its tracks.
291 */
292 sigexit(l, SIGILL);
293 /* NOTREACHED */
294 }
295
296 /*
297 * Build context to run handler in.
298 */
299 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
300 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
301 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
302 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
303 (linux_rt_sigcode - linux_sigcode);
304 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
305 tf->tf_eflags &= ~PSL_CLEARSIG;
306 tf->tf_esp = (int)fp;
307 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
308
309 /* Remember that we're now on the signal stack. */
310 if (onstack)
311 sas->ss_flags |= SS_ONSTACK;
312 }
313
314 static void
315 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
316 {
317 struct lwp *l = curlwp;
318 struct proc *p = l->l_proc;
319 struct trapframe *tf;
320 struct linux_sigframe *fp, frame;
321 int onstack, error;
322 int sig = ksi->ksi_signo;
323 sig_t catcher = SIGACTION(p, sig).sa_handler;
324 struct sigaltstack *sas = &l->l_sigstk;
325
326 tf = l->l_md.md_regs;
327
328 /* Do we need to jump onto the signal stack? */
329 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
330 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
331
332 /* Allocate space for the signal handler context. */
333 if (onstack)
334 fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
335 sas->ss_size);
336 else
337 fp = (struct linux_sigframe *)tf->tf_esp;
338 fp--;
339
340 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
341 onstack, fp, sig, tf->tf_eip,
342 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
343
344 /* Build stack frame for signal trampoline. */
345 frame.sf_handler = catcher;
346 frame.sf_sig = native_to_linux_signo[sig];
347
348 linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
349 sendsig_reset(l, sig);
350
351 mutex_exit(p->p_lock);
352 error = copyout(&frame, fp, sizeof(frame));
353 mutex_enter(p->p_lock);
354
355 if (error != 0) {
356 /*
357 * Process has trashed its stack; give it an illegal
358 * instruction to halt it in its tracks.
359 */
360 sigexit(l, SIGILL);
361 /* NOTREACHED */
362 }
363
364 /*
365 * Build context to run handler in.
366 */
367 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
368 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
369 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
370 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
371 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
372 tf->tf_eflags &= ~PSL_CLEARSIG;
373 tf->tf_esp = (int)fp;
374 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
375
376 /* Remember that we're now on the signal stack. */
377 if (onstack)
378 sas->ss_flags |= SS_ONSTACK;
379 }
380
381 /*
382 * System call to cleanup state after a signal
383 * has been taken. Reset signal mask and
384 * stack state from context left by sendsig (above).
385 * Return to previous pc and psl as specified by
386 * context left by sendsig. Check carefully to
387 * make sure that the user has not modified the
388 * psl to gain improper privileges or to cause
389 * a machine fault.
390 */
391 int
392 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
393 {
394 /* {
395 syscallarg(struct linux_ucontext *) ucp;
396 } */
397 struct linux_ucontext context, *ucp = SCARG(uap, ucp);
398 int error;
399
400 /*
401 * The trampoline code hands us the context.
402 * It is unsafe to keep track of it ourselves, in the event that a
403 * program jumps out of a signal handler.
404 */
405 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
406 return error;
407
408 /* XXX XAX we can do better here by using more of the ucontext */
409 return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
410 }
411
412 int
413 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
414 {
415 /* {
416 syscallarg(struct linux_sigcontext *) scp;
417 } */
418 struct linux_sigcontext context, *scp = SCARG(uap, scp);
419 int error;
420
421 /*
422 * The trampoline code hands us the context.
423 * It is unsafe to keep track of it ourselves, in the event that a
424 * program jumps out of a signal handler.
425 */
426 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
427 return error;
428 return linux_restore_sigcontext(l, &context, retval);
429 }
430
431 static int
432 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
433 register_t *retval)
434 {
435 struct proc *p = l->l_proc;
436 struct sigaltstack *sas = &l->l_sigstk;
437 struct trapframe *tf;
438 sigset_t mask;
439 ssize_t ss_gap;
440
441 /* Restore register context. */
442 tf = l->l_md.md_regs;
443 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
444
445 #ifdef VM86
446 if (scp->sc_eflags & PSL_VM) {
447 void syscall_vm86(struct trapframe *);
448
449 tf->tf_vm86_gs = scp->sc_gs;
450 tf->tf_vm86_fs = scp->sc_fs;
451 tf->tf_vm86_es = scp->sc_es;
452 tf->tf_vm86_ds = scp->sc_ds;
453 set_vflags(l, scp->sc_eflags);
454 p->p_md.md_syscall = syscall_vm86;
455 } else
456 #endif
457 {
458 /*
459 * Check for security violations. If we're returning to
460 * protected mode, the CPU will validate the segment registers
461 * automatically and generate a trap on violations. We handle
462 * the trap, rather than doing all of the checking here.
463 */
464 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
465 !USERMODE(scp->sc_cs, scp->sc_eflags))
466 return EINVAL;
467
468 tf->tf_gs = scp->sc_gs;
469 tf->tf_fs = scp->sc_fs;
470 tf->tf_es = scp->sc_es;
471 tf->tf_ds = scp->sc_ds;
472 #ifdef VM86
473 if (tf->tf_eflags & PSL_VM)
474 (*p->p_emul->e_syscall_intern)(p);
475 #endif
476 tf->tf_eflags = scp->sc_eflags;
477 }
478 tf->tf_edi = scp->sc_edi;
479 tf->tf_esi = scp->sc_esi;
480 tf->tf_ebp = scp->sc_ebp;
481 tf->tf_ebx = scp->sc_ebx;
482 tf->tf_edx = scp->sc_edx;
483 tf->tf_ecx = scp->sc_ecx;
484 tf->tf_eax = scp->sc_eax;
485 tf->tf_eip = scp->sc_eip;
486 tf->tf_cs = scp->sc_cs;
487 tf->tf_esp = scp->sc_esp_at_signal;
488 tf->tf_ss = scp->sc_ss;
489
490 /* Restore signal stack. */
491 /*
492 * Linux really does it this way; it doesn't have space in sigframe
493 * to save the onstack flag.
494 */
495 mutex_enter(p->p_lock);
496 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
497 if (ss_gap >= 0 && ss_gap < sas->ss_size)
498 sas->ss_flags |= SS_ONSTACK;
499 else
500 sas->ss_flags &= ~SS_ONSTACK;
501
502 /* Restore signal mask. */
503 linux_old_to_native_sigset(&mask, &scp->sc_mask);
504 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
505 mutex_exit(p->p_lock);
506
507 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
508 return EJUSTRETURN;
509 }
510
511 #ifdef USER_LDT
512
513 static int
514 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
515 register_t *retval)
516 {
517 struct x86_get_ldt_args gl;
518 int error;
519 union descriptor *ldt_buf;
520 size_t sz;
521
522 /*
523 * I've checked the linux code - this function is asymetric with
524 * linux_write_ldt, and returns raw ldt entries.
525 * NB, the code I saw zerod the spare parts of the user buffer.
526 */
527
528 DPRINTF(("linux_read_ldt!"));
529
530 sz = 8192 * sizeof(*ldt_buf);
531 ldt_buf = kmem_zalloc(sz, KM_SLEEP);
532 gl.start = 0;
533 gl.desc = NULL;
534 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
535 error = x86_get_ldt1(l, &gl, ldt_buf);
536 /* NB gl.num might have changed */
537 if (error == 0) {
538 *retval = gl.num * sizeof(*ldtstore);
539 error = copyout(ldt_buf, SCARG(uap, ptr),
540 gl.num * sizeof *ldt_buf);
541 }
542 kmem_free(ldt_buf, sz);
543
544 return error;
545 }
546
547 struct linux_ldt_info {
548 u_int entry_number;
549 u_long base_addr;
550 u_int limit;
551 u_int seg_32bit:1;
552 u_int contents:2;
553 u_int read_exec_only:1;
554 u_int limit_in_pages:1;
555 u_int seg_not_present:1;
556 u_int useable:1;
557 };
558
559 static int
560 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
561 int oldmode)
562 {
563 struct linux_ldt_info ldt_info;
564 union descriptor d;
565 struct x86_set_ldt_args sl;
566 int error;
567
568 DPRINTF(("linux_write_ldt %d\n", oldmode));
569 if (SCARG(uap, bytecount) != sizeof(ldt_info))
570 return (EINVAL);
571 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
572 return error;
573 if (ldt_info.entry_number >= 8192)
574 return (EINVAL);
575 if (ldt_info.contents == 3) {
576 if (oldmode)
577 return (EINVAL);
578 if (ldt_info.seg_not_present)
579 return (EINVAL);
580 }
581
582 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
583 (oldmode || (ldt_info.contents == 0 &&
584 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
585 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
586 ldt_info.useable == 0))) {
587 /* this means you should zero the ldt */
588 (void)memset(&d, 0, sizeof(d));
589 } else {
590 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
591 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
592 d.sd.sd_lolimit = ldt_info.limit & 0xffff;
593 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
594 d.sd.sd_type = 16 | (ldt_info.contents << 2) |
595 (!ldt_info.read_exec_only << 1);
596 d.sd.sd_dpl = SEL_UPL;
597 d.sd.sd_p = !ldt_info.seg_not_present;
598 d.sd.sd_def32 = ldt_info.seg_32bit;
599 d.sd.sd_gran = ldt_info.limit_in_pages;
600 if (!oldmode)
601 d.sd.sd_xx = ldt_info.useable;
602 else
603 d.sd.sd_xx = 0;
604 }
605 sl.start = ldt_info.entry_number;
606 sl.desc = NULL;
607 sl.num = 1;
608
609 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
610 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
611
612 return x86_set_ldt1(l, &sl, &d);
613 }
614
615 #endif /* USER_LDT */
616
617 int
618 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
619 {
620 /* {
621 syscallarg(int) func;
622 syscallarg(void *) ptr;
623 syscallarg(size_t) bytecount;
624 } */
625
626 switch (SCARG(uap, func)) {
627 #ifdef USER_LDT
628 case 0:
629 return linux_read_ldt(l, (const void *)uap, retval);
630 case 1:
631 return linux_write_ldt(l, (const void *)uap, 1);
632 case 2:
633 #ifdef notyet
634 return linux_read_default_ldt(l, (const void *)uap, retval);
635 #else
636 return (ENOSYS);
637 #endif
638 case 0x11:
639 return linux_write_ldt(l, (const void *)uap, 0);
640 #endif /* USER_LDT */
641
642 default:
643 return (ENOSYS);
644 }
645 }
646
647 /*
648 * XXX Pathetic hack to make svgalib work. This will fake the major
649 * device number of an opened VT so that svgalib likes it. grmbl.
650 * Should probably do it 'wrong the right way' and use a mapping
651 * array for all major device numbers, and map linux_mknod too.
652 */
653 dev_t
654 linux_fakedev(dev_t dev, int raw)
655 {
656 dev_t ret;
657 extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
658 const struct cdevsw *cd = cdevsw_lookup_acquire(dev);
659
660 if (raw) {
661 #if (NWSDISPLAY > 0)
662 extern const struct cdevsw wsdisplay_cdevsw;
663 if (cd == &wsdisplay_cdevsw) {
664 cdevsw_release(cd);
665 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
666 }
667 #endif
668 }
669
670 if (cd == &ptc_cdevsw)
671 ret = makedev(LINUX_PTC_MAJOR, minor(dev));
672 else if (cd == &pts_cdevsw)
673 ret = makedev(LINUX_PTS_MAJOR, minor(dev));
674 else
675 ret = dev;
676 cdevsw_release(cd);
677 return ret;
678 }
679
680 #if (NWSDISPLAY > 0)
681 /*
682 * That's not complete, but enough to get an X server running.
683 */
684 #define NR_KEYS 128
685 static const u_short plain_map[NR_KEYS] = {
686 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
687 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
688 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
689 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
690 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
691 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
692 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
693 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
694 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
695 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
696 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
697 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
698 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
699 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
700 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
701 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
702 }, shift_map[NR_KEYS] = {
703 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
704 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
705 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
706 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
707 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
708 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
709 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
710 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
711 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
712 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
713 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
714 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
715 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
716 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
717 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
718 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
719 }, altgr_map[NR_KEYS] = {
720 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
721 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
722 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
723 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
724 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
725 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
726 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
727 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
728 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
729 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
730 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
731 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
732 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
733 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
734 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
735 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
736 }, ctrl_map[NR_KEYS] = {
737 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
738 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
739 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
740 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
741 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
742 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
743 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
744 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
745 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
746 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
747 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
748 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
749 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
750 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
751 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
752 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
753 };
754
755 const u_short * const linux_keytabs[] = {
756 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
757 };
758 #endif
759
760 static struct biosdisk_info *
761 fd2biosinfo(struct proc *p, struct file *fp)
762 {
763 struct vnode *vp;
764 const char *blkname;
765 char diskname[16];
766 int i;
767 struct nativedisk_info *nip;
768 struct disklist *dl = x86_alldisks;
769
770 if (dl == NULL)
771 return NULL;
772 if (fp->f_type != DTYPE_VNODE)
773 return NULL;
774 vp = (struct vnode *)fp->f_data;
775
776 if (vp->v_type != VBLK)
777 return NULL;
778
779 blkname = devsw_blk2name(major(vp->v_rdev));
780 snprintf(diskname, sizeof diskname, "%s%llu", blkname,
781 (unsigned long long)DISKUNIT(vp->v_rdev));
782
783 for (i = 0; i < dl->dl_nnativedisks; i++) {
784 nip = &dl->dl_nativedisks[i];
785 if (strcmp(diskname, nip->ni_devname))
786 continue;
787 if (nip->ni_nmatches != 0)
788 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
789 }
790
791 return NULL;
792 }
793
794
795 /*
796 * We come here in a last attempt to satisfy a Linux ioctl() call
797 */
798 int
799 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
800 {
801 /* {
802 syscallarg(int) fd;
803 syscallarg(u_long) com;
804 syscallarg(void *) data;
805 } */
806 struct sys_ioctl_args bia;
807 u_long com;
808 int error, error1;
809 #if (NWSDISPLAY > 0)
810 struct vt_mode lvt;
811 struct kbentry kbe;
812 #endif
813 struct linux_hd_geometry hdg;
814 struct linux_hd_big_geometry hdg_big;
815 struct biosdisk_info *bip;
816 file_t *fp;
817 int fd;
818 struct disklabel label;
819 struct partinfo partp;
820 int (*ioctlf)(struct file *, u_long, void *);
821 u_long start, biostotal, realtotal;
822 u_char heads, sectors;
823 u_int cylinders;
824 struct ioctl_pt pt;
825
826 fd = SCARG(uap, fd);
827 SCARG(&bia, fd) = fd;
828 SCARG(&bia, data) = SCARG(uap, data);
829 com = SCARG(uap, com);
830
831 if ((fp = fd_getfile(fd)) == NULL)
832 return (EBADF);
833
834 switch (com) {
835 #if (NWSDISPLAY > 0)
836 case LINUX_KDGKBMODE:
837 com = KDGKBMODE;
838 break;
839 case LINUX_KDSKBMODE:
840 com = KDSKBMODE;
841 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
842 SCARG(&bia, data) = (void *)K_RAW;
843 break;
844 case LINUX_KIOCSOUND:
845 SCARG(&bia, data) =
846 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
847 /* fall through */
848 case LINUX_KDMKTONE:
849 com = KDMKTONE;
850 break;
851 case LINUX_KDSETMODE:
852 com = KDSETMODE;
853 break;
854 case LINUX_KDGETMODE:
855 /* KD_* values are equal to the wscons numbers */
856 com = WSDISPLAYIO_GMODE;
857 break;
858 case LINUX_KDENABIO:
859 com = KDENABIO;
860 break;
861 case LINUX_KDDISABIO:
862 com = KDDISABIO;
863 break;
864 case LINUX_KDGETLED:
865 com = KDGETLED;
866 break;
867 case LINUX_KDSETLED:
868 com = KDSETLED;
869 break;
870 case LINUX_VT_OPENQRY:
871 com = VT_OPENQRY;
872 break;
873 case LINUX_VT_GETMODE:
874 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
875 if (error != 0)
876 goto out;
877 lvt.relsig = native_to_linux_signo[lvt.relsig];
878 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
879 lvt.frsig = native_to_linux_signo[lvt.frsig];
880 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
881 goto out;
882 case LINUX_VT_SETMODE:
883 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
884 if (error != 0)
885 goto out;
886 lvt.relsig = linux_to_native_signo[lvt.relsig];
887 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
888 lvt.frsig = linux_to_native_signo[lvt.frsig];
889 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
890 goto out;
891 case LINUX_VT_DISALLOCATE:
892 /* XXX should use WSDISPLAYIO_DELSCREEN */
893 error = 0;
894 goto out;
895 case LINUX_VT_RELDISP:
896 com = VT_RELDISP;
897 break;
898 case LINUX_VT_ACTIVATE:
899 com = VT_ACTIVATE;
900 break;
901 case LINUX_VT_WAITACTIVE:
902 com = VT_WAITACTIVE;
903 break;
904 case LINUX_VT_GETSTATE:
905 com = VT_GETSTATE;
906 break;
907 case LINUX_KDGKBTYPE:
908 {
909 static const u_int8_t kb101 = KB_101;
910
911 /* This is what Linux does. */
912 error = copyout(&kb101, SCARG(uap, data), 1);
913 goto out;
914 }
915 case LINUX_KDGKBENT:
916 /*
917 * The Linux KDGKBENT ioctl is different from the
918 * SYSV original. So we handle it in machdep code.
919 * XXX We should use keyboard mapping information
920 * from wsdisplay, but this would be expensive.
921 */
922 if ((error = copyin(SCARG(uap, data), &kbe,
923 sizeof(struct kbentry))))
924 goto out;
925 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
926 || kbe.kb_index >= NR_KEYS) {
927 error = EINVAL;
928 goto out;
929 }
930 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
931 error = copyout(&kbe, SCARG(uap, data),
932 sizeof(struct kbentry));
933 goto out;
934 #endif
935 case LINUX_HDIO_GETGEO:
936 case LINUX_HDIO_GETGEO_BIG:
937 /*
938 * Try to mimic Linux behaviour: return the BIOS geometry
939 * if possible (extending its # of cylinders if it's beyond
940 * the 1023 limit), fall back to the MI geometry (i.e.
941 * the real geometry) if not found, by returning an
942 * error. See common/linux_hdio.c
943 */
944 bip = fd2biosinfo(curproc, fp);
945 ioctlf = fp->f_ops->fo_ioctl;
946 error = ioctlf(fp, DIOCGDINFO, (void *)&label);
947 error1 = ioctlf(fp, DIOCGPARTINFO, (void *)&partp);
948 if (error != 0 && error1 != 0) {
949 error = error1;
950 goto out;
951 }
952 start = error1 != 0 ? partp.pi_offset : 0;
953 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
954 && bip->bi_cyl != 0) {
955 heads = bip->bi_head;
956 sectors = bip->bi_sec;
957 cylinders = bip->bi_cyl;
958 biostotal = heads * sectors * cylinders;
959 realtotal = label.d_ntracks * label.d_nsectors *
960 label.d_ncylinders;
961 if (realtotal > biostotal)
962 cylinders = realtotal / (heads * sectors);
963 } else {
964 heads = label.d_ntracks;
965 cylinders = label.d_ncylinders;
966 sectors = label.d_nsectors;
967 }
968 if (com == LINUX_HDIO_GETGEO) {
969 hdg.start = start;
970 hdg.heads = heads;
971 hdg.cylinders = cylinders;
972 hdg.sectors = sectors;
973 error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
974 goto out;
975 } else {
976 hdg_big.start = start;
977 hdg_big.heads = heads;
978 hdg_big.cylinders = cylinders;
979 hdg_big.sectors = sectors;
980 error = copyout(&hdg_big, SCARG(uap, data),
981 sizeof hdg_big);
982 goto out;
983 }
984
985 default:
986 /*
987 * Unknown to us. If it's on a device, just pass it through
988 * using PTIOCLINUX, the device itself might be able to
989 * make some sense of it.
990 * XXX hack: if the function returns EJUSTRETURN,
991 * it has stuffed a sysctl return value in pt.data.
992 */
993 ioctlf = fp->f_ops->fo_ioctl;
994 pt.com = SCARG(uap, com);
995 pt.data = SCARG(uap, data);
996 error = ioctlf(fp, PTIOCLINUX, &pt);
997 if (error == EJUSTRETURN) {
998 retval[0] = (register_t)pt.data;
999 error = 0;
1000 }
1001
1002 if (error == ENOTTY) {
1003 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1004 com));
1005 }
1006 goto out;
1007 }
1008 SCARG(&bia, com) = com;
1009 error = sys_ioctl(curlwp, &bia, retval);
1010 out:
1011 fd_putfile(fd);
1012 return error;
1013 }
1014
1015 /*
1016 * Set I/O permissions for a process. Just set the maximum level
1017 * right away (ignoring the argument), otherwise we would have
1018 * to rely on I/O permission maps, which are not implemented.
1019 */
1020 int
1021 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
1022 {
1023 /* {
1024 syscallarg(int) level;
1025 } */
1026 struct trapframe *fp = l->l_md.md_regs;
1027
1028 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1029 NULL, NULL, NULL, NULL) != 0)
1030 return EPERM;
1031 fp->tf_eflags |= PSL_IOPL;
1032 *retval = 0;
1033 return 0;
1034 }
1035
1036 /*
1037 * See above. If a root process tries to set access to an I/O port,
1038 * just let it have the whole range.
1039 */
1040 int
1041 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1042 {
1043 /* {
1044 syscallarg(unsigned int) lo;
1045 syscallarg(unsigned int) hi;
1046 syscallarg(int) val;
1047 } */
1048 struct trapframe *fp = l->l_md.md_regs;
1049
1050 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1051 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1052 NULL, NULL) != 0)
1053 return EPERM;
1054 if (SCARG(uap, val))
1055 fp->tf_eflags |= PSL_IOPL;
1056 *retval = 0;
1057 return 0;
1058 }
1059
1060 int
1061 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1062 void *arg)
1063 {
1064 return 0;
1065 }
1066
1067 const char *
1068 linux_get_uname_arch(void)
1069 {
1070 static char uname_arch[5] = "i386";
1071
1072 if (uname_arch[1] == '3')
1073 uname_arch[1] += cpu_class;
1074 return uname_arch;
1075 }
1076