linux_machdep.c revision 1.157 1 /* $NetBSD: linux_machdep.c,v 1.157 2014/02/15 10:11:15 dsl Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.157 2014/02/15 10:11:15 dsl Exp $");
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_vm86.h"
37 #include "opt_user_ldt.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/kernel.h>
44 #include <sys/proc.h>
45 #include <sys/buf.h>
46 #include <sys/reboot.h>
47 #include <sys/conf.h>
48 #include <sys/exec.h>
49 #include <sys/file.h>
50 #include <sys/callout.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/msgbuf.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/device.h>
57 #include <sys/syscallargs.h>
58 #include <sys/filedesc.h>
59 #include <sys/exec_elf.h>
60 #include <sys/disklabel.h>
61 #include <sys/ioctl.h>
62 #include <sys/wait.h>
63 #include <sys/kauth.h>
64 #include <sys/kmem.h>
65
66 #include <miscfs/specfs/specdev.h>
67
68 #include <compat/linux/common/linux_types.h>
69 #include <compat/linux/common/linux_signal.h>
70 #include <compat/linux/common/linux_util.h>
71 #include <compat/linux/common/linux_ioctl.h>
72 #include <compat/linux/common/linux_hdio.h>
73 #include <compat/linux/common/linux_exec.h>
74 #include <compat/linux/common/linux_machdep.h>
75 #include <compat/linux/common/linux_errno.h>
76
77 #include <compat/linux/linux_syscallargs.h>
78
79 #include <sys/cpu.h>
80 #include <machine/cpufunc.h>
81 #include <machine/psl.h>
82 #include <machine/reg.h>
83 #include <machine/segments.h>
84 #include <machine/specialreg.h>
85 #include <machine/sysarch.h>
86 #include <machine/vm86.h>
87 #include <machine/vmparam.h>
88
89 /*
90 * To see whether wscons is configured (for virtual console ioctl calls).
91 */
92 #if defined(_KERNEL_OPT)
93 #include "wsdisplay.h"
94 #endif
95 #if (NWSDISPLAY > 0)
96 #include <dev/wscons/wsconsio.h>
97 #include <dev/wscons/wsdisplay_usl_io.h>
98 #if defined(_KERNEL_OPT)
99 #include "opt_xserver.h"
100 #endif
101 #endif
102
103 #ifdef DEBUG_LINUX
104 #define DPRINTF(a) uprintf a
105 #else
106 #define DPRINTF(a)
107 #endif
108
109 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
110 extern struct disklist *x86_alldisks;
111 static void linux_save_ucontext(struct lwp *, struct trapframe *,
112 const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
113 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
114 const sigset_t *, struct linux_sigcontext *);
115 static int linux_restore_sigcontext(struct lwp *,
116 struct linux_sigcontext *, register_t *);
117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
119
120 extern char linux_sigcode[], linux_rt_sigcode[];
121
122 /*
123 * Deal with some i386-specific things in the Linux emulation code.
124 */
125
126 void
127 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
128 {
129 struct trapframe *tf;
130
131 #ifdef USER_LDT
132 pmap_ldt_cleanup(l);
133 #endif
134
135 fpu_save_area_clear(l, __Linux_NPXCW__);
136
137 tf = l->l_md.md_regs;
138 tf->tf_gs = 0;
139 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
140 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
141 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
142 tf->tf_edi = 0;
143 tf->tf_esi = 0;
144 tf->tf_ebp = 0;
145 tf->tf_ebx = l->l_proc->p_psstrp;
146 tf->tf_edx = 0;
147 tf->tf_ecx = 0;
148 tf->tf_eax = 0;
149 tf->tf_eip = epp->ep_entry;
150 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
151 tf->tf_eflags = PSL_USERSET;
152 tf->tf_esp = stack;
153 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
154 }
155
156 /*
157 * Send an interrupt to process.
158 *
159 * Stack is set up to allow sigcode stored
160 * in u. to call routine, followed by kcall
161 * to sigreturn routine below. After sigreturn
162 * resets the signal mask, the stack, and the
163 * frame pointer, it returns to the user
164 * specified pc, psl.
165 */
166
167 void
168 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
169 {
170 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
171 linux_rt_sendsig(ksi, mask);
172 else
173 linux_old_sendsig(ksi, mask);
174 }
175
176
177 static void
178 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
179 {
180 uc->uc_flags = 0;
181 uc->uc_link = NULL;
182 native_to_linux_sigaltstack(&uc->uc_stack, sas);
183 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
184 native_to_linux_sigset(&uc->uc_sigmask, mask);
185 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
186 }
187
188 static void
189 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
190 const sigset_t *mask, struct linux_sigcontext *sc)
191 {
192 struct pcb *pcb = lwp_getpcb(l);
193
194 /* Save register context. */
195 #ifdef VM86
196 if (tf->tf_eflags & PSL_VM) {
197 sc->sc_gs = tf->tf_vm86_gs;
198 sc->sc_fs = tf->tf_vm86_fs;
199 sc->sc_es = tf->tf_vm86_es;
200 sc->sc_ds = tf->tf_vm86_ds;
201 sc->sc_eflags = get_vflags(l);
202 } else
203 #endif
204 {
205 sc->sc_gs = tf->tf_gs;
206 sc->sc_fs = tf->tf_fs;
207 sc->sc_es = tf->tf_es;
208 sc->sc_ds = tf->tf_ds;
209 sc->sc_eflags = tf->tf_eflags;
210 }
211 sc->sc_edi = tf->tf_edi;
212 sc->sc_esi = tf->tf_esi;
213 sc->sc_esp = tf->tf_esp;
214 sc->sc_ebp = tf->tf_ebp;
215 sc->sc_ebx = tf->tf_ebx;
216 sc->sc_edx = tf->tf_edx;
217 sc->sc_ecx = tf->tf_ecx;
218 sc->sc_eax = tf->tf_eax;
219 sc->sc_eip = tf->tf_eip;
220 sc->sc_cs = tf->tf_cs;
221 sc->sc_esp_at_signal = tf->tf_esp;
222 sc->sc_ss = tf->tf_ss;
223 sc->sc_err = tf->tf_err;
224 sc->sc_trapno = tf->tf_trapno;
225 sc->sc_cr2 = pcb->pcb_cr2;
226 sc->sc_387 = NULL;
227
228 /* Save signal stack. */
229 /* Linux doesn't save the onstack flag in sigframe */
230
231 /* Save signal mask. */
232 native_to_linux_old_sigset(&sc->sc_mask, mask);
233 }
234
235 static void
236 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
237 {
238 struct lwp *l = curlwp;
239 struct proc *p = l->l_proc;
240 struct trapframe *tf;
241 struct linux_rt_sigframe *fp, frame;
242 int onstack, error;
243 int sig = ksi->ksi_signo;
244 sig_t catcher = SIGACTION(p, sig).sa_handler;
245 struct sigaltstack *sas = &l->l_sigstk;
246
247 tf = l->l_md.md_regs;
248 /* Do we need to jump onto the signal stack? */
249 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
250 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
251
252
253 /* Allocate space for the signal handler context. */
254 if (onstack)
255 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
256 sas->ss_size);
257 else
258 fp = (struct linux_rt_sigframe *)tf->tf_esp;
259 fp--;
260
261 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
262 onstack, fp, sig, tf->tf_eip,
263 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
264
265 /* Build stack frame for signal trampoline. */
266 frame.sf_handler = catcher;
267 frame.sf_sig = native_to_linux_signo[sig];
268 frame.sf_sip = &fp->sf_si;
269 frame.sf_ucp = &fp->sf_uc;
270
271 /*
272 * XXX: the following code assumes that the constants for
273 * siginfo are the same between linux and NetBSD.
274 */
275 native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info);
276
277 /* Save register context. */
278 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
279 sendsig_reset(l, sig);
280
281 mutex_exit(p->p_lock);
282 error = copyout(&frame, fp, sizeof(frame));
283 mutex_enter(p->p_lock);
284
285 if (error != 0) {
286 /*
287 * Process has trashed its stack; give it an illegal
288 * instruction to halt it in its tracks.
289 */
290 sigexit(l, SIGILL);
291 /* NOTREACHED */
292 }
293
294 /*
295 * Build context to run handler in.
296 */
297 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
298 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
299 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
300 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
301 (linux_rt_sigcode - linux_sigcode);
302 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
303 tf->tf_eflags &= ~PSL_CLEARSIG;
304 tf->tf_esp = (int)fp;
305 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
306
307 /* Remember that we're now on the signal stack. */
308 if (onstack)
309 sas->ss_flags |= SS_ONSTACK;
310 }
311
312 static void
313 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
314 {
315 struct lwp *l = curlwp;
316 struct proc *p = l->l_proc;
317 struct trapframe *tf;
318 struct linux_sigframe *fp, frame;
319 int onstack, error;
320 int sig = ksi->ksi_signo;
321 sig_t catcher = SIGACTION(p, sig).sa_handler;
322 struct sigaltstack *sas = &l->l_sigstk;
323
324 tf = l->l_md.md_regs;
325
326 /* Do we need to jump onto the signal stack? */
327 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
328 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
329
330 /* Allocate space for the signal handler context. */
331 if (onstack)
332 fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
333 sas->ss_size);
334 else
335 fp = (struct linux_sigframe *)tf->tf_esp;
336 fp--;
337
338 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
339 onstack, fp, sig, tf->tf_eip,
340 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
341
342 /* Build stack frame for signal trampoline. */
343 frame.sf_handler = catcher;
344 frame.sf_sig = native_to_linux_signo[sig];
345
346 linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
347 sendsig_reset(l, sig);
348
349 mutex_exit(p->p_lock);
350 error = copyout(&frame, fp, sizeof(frame));
351 mutex_enter(p->p_lock);
352
353 if (error != 0) {
354 /*
355 * Process has trashed its stack; give it an illegal
356 * instruction to halt it in its tracks.
357 */
358 sigexit(l, SIGILL);
359 /* NOTREACHED */
360 }
361
362 /*
363 * Build context to run handler in.
364 */
365 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
366 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
367 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
368 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
369 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
370 tf->tf_eflags &= ~PSL_CLEARSIG;
371 tf->tf_esp = (int)fp;
372 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
373
374 /* Remember that we're now on the signal stack. */
375 if (onstack)
376 sas->ss_flags |= SS_ONSTACK;
377 }
378
379 /*
380 * System call to cleanup state after a signal
381 * has been taken. Reset signal mask and
382 * stack state from context left by sendsig (above).
383 * Return to previous pc and psl as specified by
384 * context left by sendsig. Check carefully to
385 * make sure that the user has not modified the
386 * psl to gain improper privileges or to cause
387 * a machine fault.
388 */
389 int
390 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
391 {
392 /* {
393 syscallarg(struct linux_ucontext *) ucp;
394 } */
395 struct linux_ucontext context, *ucp = SCARG(uap, ucp);
396 int error;
397
398 /*
399 * The trampoline code hands us the context.
400 * It is unsafe to keep track of it ourselves, in the event that a
401 * program jumps out of a signal handler.
402 */
403 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
404 return error;
405
406 /* XXX XAX we can do better here by using more of the ucontext */
407 return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
408 }
409
410 int
411 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
412 {
413 /* {
414 syscallarg(struct linux_sigcontext *) scp;
415 } */
416 struct linux_sigcontext context, *scp = SCARG(uap, scp);
417 int error;
418
419 /*
420 * The trampoline code hands us the context.
421 * It is unsafe to keep track of it ourselves, in the event that a
422 * program jumps out of a signal handler.
423 */
424 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
425 return error;
426 return linux_restore_sigcontext(l, &context, retval);
427 }
428
429 static int
430 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
431 register_t *retval)
432 {
433 struct proc *p = l->l_proc;
434 struct sigaltstack *sas = &l->l_sigstk;
435 struct trapframe *tf;
436 sigset_t mask;
437 ssize_t ss_gap;
438
439 /* Restore register context. */
440 tf = l->l_md.md_regs;
441 DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
442
443 #ifdef VM86
444 if (scp->sc_eflags & PSL_VM) {
445 void syscall_vm86(struct trapframe *);
446
447 tf->tf_vm86_gs = scp->sc_gs;
448 tf->tf_vm86_fs = scp->sc_fs;
449 tf->tf_vm86_es = scp->sc_es;
450 tf->tf_vm86_ds = scp->sc_ds;
451 set_vflags(l, scp->sc_eflags);
452 p->p_md.md_syscall = syscall_vm86;
453 } else
454 #endif
455 {
456 /*
457 * Check for security violations. If we're returning to
458 * protected mode, the CPU will validate the segment registers
459 * automatically and generate a trap on violations. We handle
460 * the trap, rather than doing all of the checking here.
461 */
462 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
463 !USERMODE(scp->sc_cs, scp->sc_eflags))
464 return EINVAL;
465
466 tf->tf_gs = scp->sc_gs;
467 tf->tf_fs = scp->sc_fs;
468 tf->tf_es = scp->sc_es;
469 tf->tf_ds = scp->sc_ds;
470 #ifdef VM86
471 if (tf->tf_eflags & PSL_VM)
472 (*p->p_emul->e_syscall_intern)(p);
473 #endif
474 tf->tf_eflags = scp->sc_eflags;
475 }
476 tf->tf_edi = scp->sc_edi;
477 tf->tf_esi = scp->sc_esi;
478 tf->tf_ebp = scp->sc_ebp;
479 tf->tf_ebx = scp->sc_ebx;
480 tf->tf_edx = scp->sc_edx;
481 tf->tf_ecx = scp->sc_ecx;
482 tf->tf_eax = scp->sc_eax;
483 tf->tf_eip = scp->sc_eip;
484 tf->tf_cs = scp->sc_cs;
485 tf->tf_esp = scp->sc_esp_at_signal;
486 tf->tf_ss = scp->sc_ss;
487
488 /* Restore signal stack. */
489 /*
490 * Linux really does it this way; it doesn't have space in sigframe
491 * to save the onstack flag.
492 */
493 mutex_enter(p->p_lock);
494 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
495 if (ss_gap >= 0 && ss_gap < sas->ss_size)
496 sas->ss_flags |= SS_ONSTACK;
497 else
498 sas->ss_flags &= ~SS_ONSTACK;
499
500 /* Restore signal mask. */
501 linux_old_to_native_sigset(&mask, &scp->sc_mask);
502 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
503 mutex_exit(p->p_lock);
504
505 DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
506 return EJUSTRETURN;
507 }
508
509 #ifdef USER_LDT
510
511 static int
512 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
513 register_t *retval)
514 {
515 struct x86_get_ldt_args gl;
516 int error;
517 union descriptor *ldt_buf;
518 size_t sz;
519
520 /*
521 * I've checked the linux code - this function is asymetric with
522 * linux_write_ldt, and returns raw ldt entries.
523 * NB, the code I saw zerod the spare parts of the user buffer.
524 */
525
526 DPRINTF(("linux_read_ldt!"));
527
528 sz = 8192 * sizeof(*ldt_buf);
529 ldt_buf = kmem_zalloc(sz, KM_SLEEP);
530 gl.start = 0;
531 gl.desc = NULL;
532 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
533 error = x86_get_ldt1(l, &gl, ldt_buf);
534 /* NB gl.num might have changed */
535 if (error == 0) {
536 *retval = gl.num * sizeof *ldt;
537 error = copyout(ldt_buf, SCARG(uap, ptr),
538 gl.num * sizeof *ldt_buf);
539 }
540 kmem_free(ldt_buf, sz);
541
542 return error;
543 }
544
545 struct linux_ldt_info {
546 u_int entry_number;
547 u_long base_addr;
548 u_int limit;
549 u_int seg_32bit:1;
550 u_int contents:2;
551 u_int read_exec_only:1;
552 u_int limit_in_pages:1;
553 u_int seg_not_present:1;
554 u_int useable:1;
555 };
556
557 static int
558 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
559 int oldmode)
560 {
561 struct linux_ldt_info ldt_info;
562 union descriptor d;
563 struct x86_set_ldt_args sl;
564 int error;
565
566 DPRINTF(("linux_write_ldt %d\n", oldmode));
567 if (SCARG(uap, bytecount) != sizeof(ldt_info))
568 return (EINVAL);
569 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
570 return error;
571 if (ldt_info.entry_number >= 8192)
572 return (EINVAL);
573 if (ldt_info.contents == 3) {
574 if (oldmode)
575 return (EINVAL);
576 if (ldt_info.seg_not_present)
577 return (EINVAL);
578 }
579
580 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
581 (oldmode || (ldt_info.contents == 0 &&
582 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
583 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
584 ldt_info.useable == 0))) {
585 /* this means you should zero the ldt */
586 (void)memset(&d, 0, sizeof(d));
587 } else {
588 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
589 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
590 d.sd.sd_lolimit = ldt_info.limit & 0xffff;
591 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
592 d.sd.sd_type = 16 | (ldt_info.contents << 2) |
593 (!ldt_info.read_exec_only << 1);
594 d.sd.sd_dpl = SEL_UPL;
595 d.sd.sd_p = !ldt_info.seg_not_present;
596 d.sd.sd_def32 = ldt_info.seg_32bit;
597 d.sd.sd_gran = ldt_info.limit_in_pages;
598 if (!oldmode)
599 d.sd.sd_xx = ldt_info.useable;
600 else
601 d.sd.sd_xx = 0;
602 }
603 sl.start = ldt_info.entry_number;
604 sl.desc = NULL;
605 sl.num = 1;
606
607 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
608 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
609
610 return x86_set_ldt1(l, &sl, &d);
611 }
612
613 #endif /* USER_LDT */
614
615 int
616 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
617 {
618 /* {
619 syscallarg(int) func;
620 syscallarg(void *) ptr;
621 syscallarg(size_t) bytecount;
622 } */
623
624 switch (SCARG(uap, func)) {
625 #ifdef USER_LDT
626 case 0:
627 return linux_read_ldt(l, (const void *)uap, retval);
628 case 1:
629 return linux_write_ldt(l, (const void *)uap, 1);
630 case 2:
631 #ifdef notyet
632 return linux_read_default_ldt(l, (const void *)uap, retval);
633 #else
634 return (ENOSYS);
635 #endif
636 case 0x11:
637 return linux_write_ldt(l, (const void *)uap, 0);
638 #endif /* USER_LDT */
639
640 default:
641 return (ENOSYS);
642 }
643 }
644
645 /*
646 * XXX Pathetic hack to make svgalib work. This will fake the major
647 * device number of an opened VT so that svgalib likes it. grmbl.
648 * Should probably do it 'wrong the right way' and use a mapping
649 * array for all major device numbers, and map linux_mknod too.
650 */
651 dev_t
652 linux_fakedev(dev_t dev, int raw)
653 {
654 extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
655 const struct cdevsw *cd = cdevsw_lookup(dev);
656
657 if (raw) {
658 #if (NWSDISPLAY > 0)
659 extern const struct cdevsw wsdisplay_cdevsw;
660 if (cd == &wsdisplay_cdevsw)
661 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
662 #endif
663 }
664
665 if (cd == &ptc_cdevsw)
666 return makedev(LINUX_PTC_MAJOR, minor(dev));
667 if (cd == &pts_cdevsw)
668 return makedev(LINUX_PTS_MAJOR, minor(dev));
669
670 return dev;
671 }
672
673 #if (NWSDISPLAY > 0)
674 /*
675 * That's not complete, but enough to get an X server running.
676 */
677 #define NR_KEYS 128
678 static const u_short plain_map[NR_KEYS] = {
679 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
680 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
681 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
682 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
683 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
684 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
685 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
686 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
687 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
688 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
689 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
690 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
691 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
692 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
693 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
694 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
695 }, shift_map[NR_KEYS] = {
696 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
697 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
698 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
699 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
700 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
701 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
702 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
703 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
704 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
705 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
706 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
707 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
708 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
709 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
710 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
711 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
712 }, altgr_map[NR_KEYS] = {
713 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
714 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
715 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
716 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
717 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
718 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
719 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
720 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
721 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
722 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
723 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
724 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
725 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
726 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
727 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
728 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
729 }, ctrl_map[NR_KEYS] = {
730 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
731 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
732 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
733 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
734 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
735 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
736 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
737 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
738 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
739 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
740 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
741 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
742 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
743 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
744 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
745 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
746 };
747
748 const u_short * const linux_keytabs[] = {
749 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
750 };
751 #endif
752
753 static struct biosdisk_info *
754 fd2biosinfo(struct proc *p, struct file *fp)
755 {
756 struct vnode *vp;
757 const char *blkname;
758 char diskname[16];
759 int i;
760 struct nativedisk_info *nip;
761 struct disklist *dl = x86_alldisks;
762
763 if (fp->f_type != DTYPE_VNODE)
764 return NULL;
765 vp = (struct vnode *)fp->f_data;
766
767 if (vp->v_type != VBLK)
768 return NULL;
769
770 blkname = devsw_blk2name(major(vp->v_rdev));
771 snprintf(diskname, sizeof diskname, "%s%llu", blkname,
772 (unsigned long long)DISKUNIT(vp->v_rdev));
773
774 for (i = 0; i < dl->dl_nnativedisks; i++) {
775 nip = &dl->dl_nativedisks[i];
776 if (strcmp(diskname, nip->ni_devname))
777 continue;
778 if (nip->ni_nmatches != 0)
779 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
780 }
781
782 return NULL;
783 }
784
785
786 /*
787 * We come here in a last attempt to satisfy a Linux ioctl() call
788 */
789 int
790 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
791 {
792 /* {
793 syscallarg(int) fd;
794 syscallarg(u_long) com;
795 syscallarg(void *) data;
796 } */
797 struct sys_ioctl_args bia;
798 u_long com;
799 int error, error1;
800 #if (NWSDISPLAY > 0)
801 struct vt_mode lvt;
802 struct kbentry kbe;
803 #endif
804 struct linux_hd_geometry hdg;
805 struct linux_hd_big_geometry hdg_big;
806 struct biosdisk_info *bip;
807 file_t *fp;
808 int fd;
809 struct disklabel label, *labp;
810 struct partinfo partp;
811 int (*ioctlf)(struct file *, u_long, void *);
812 u_long start, biostotal, realtotal;
813 u_char heads, sectors;
814 u_int cylinders;
815 struct ioctl_pt pt;
816
817 fd = SCARG(uap, fd);
818 SCARG(&bia, fd) = fd;
819 SCARG(&bia, data) = SCARG(uap, data);
820 com = SCARG(uap, com);
821
822 if ((fp = fd_getfile(fd)) == NULL)
823 return (EBADF);
824
825 switch (com) {
826 #if (NWSDISPLAY > 0)
827 case LINUX_KDGKBMODE:
828 com = KDGKBMODE;
829 break;
830 case LINUX_KDSKBMODE:
831 com = KDSKBMODE;
832 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
833 SCARG(&bia, data) = (void *)K_RAW;
834 break;
835 case LINUX_KIOCSOUND:
836 SCARG(&bia, data) =
837 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
838 /* fall through */
839 case LINUX_KDMKTONE:
840 com = KDMKTONE;
841 break;
842 case LINUX_KDSETMODE:
843 com = KDSETMODE;
844 break;
845 case LINUX_KDGETMODE:
846 /* KD_* values are equal to the wscons numbers */
847 com = WSDISPLAYIO_GMODE;
848 break;
849 case LINUX_KDENABIO:
850 com = KDENABIO;
851 break;
852 case LINUX_KDDISABIO:
853 com = KDDISABIO;
854 break;
855 case LINUX_KDGETLED:
856 com = KDGETLED;
857 break;
858 case LINUX_KDSETLED:
859 com = KDSETLED;
860 break;
861 case LINUX_VT_OPENQRY:
862 com = VT_OPENQRY;
863 break;
864 case LINUX_VT_GETMODE:
865 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
866 if (error != 0)
867 goto out;
868 lvt.relsig = native_to_linux_signo[lvt.relsig];
869 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
870 lvt.frsig = native_to_linux_signo[lvt.frsig];
871 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
872 goto out;
873 case LINUX_VT_SETMODE:
874 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
875 if (error != 0)
876 goto out;
877 lvt.relsig = linux_to_native_signo[lvt.relsig];
878 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
879 lvt.frsig = linux_to_native_signo[lvt.frsig];
880 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
881 goto out;
882 case LINUX_VT_DISALLOCATE:
883 /* XXX should use WSDISPLAYIO_DELSCREEN */
884 error = 0;
885 goto out;
886 case LINUX_VT_RELDISP:
887 com = VT_RELDISP;
888 break;
889 case LINUX_VT_ACTIVATE:
890 com = VT_ACTIVATE;
891 break;
892 case LINUX_VT_WAITACTIVE:
893 com = VT_WAITACTIVE;
894 break;
895 case LINUX_VT_GETSTATE:
896 com = VT_GETSTATE;
897 break;
898 case LINUX_KDGKBTYPE:
899 {
900 static const u_int8_t kb101 = KB_101;
901
902 /* This is what Linux does. */
903 error = copyout(&kb101, SCARG(uap, data), 1);
904 goto out;
905 }
906 case LINUX_KDGKBENT:
907 /*
908 * The Linux KDGKBENT ioctl is different from the
909 * SYSV original. So we handle it in machdep code.
910 * XXX We should use keyboard mapping information
911 * from wsdisplay, but this would be expensive.
912 */
913 if ((error = copyin(SCARG(uap, data), &kbe,
914 sizeof(struct kbentry))))
915 goto out;
916 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
917 || kbe.kb_index >= NR_KEYS) {
918 error = EINVAL;
919 goto out;
920 }
921 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
922 error = copyout(&kbe, SCARG(uap, data),
923 sizeof(struct kbentry));
924 goto out;
925 #endif
926 case LINUX_HDIO_GETGEO:
927 case LINUX_HDIO_GETGEO_BIG:
928 /*
929 * Try to mimic Linux behaviour: return the BIOS geometry
930 * if possible (extending its # of cylinders if it's beyond
931 * the 1023 limit), fall back to the MI geometry (i.e.
932 * the real geometry) if not found, by returning an
933 * error. See common/linux_hdio.c
934 */
935 bip = fd2biosinfo(curproc, fp);
936 ioctlf = fp->f_ops->fo_ioctl;
937 error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
938 error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
939 if (error != 0 && error1 != 0) {
940 error = error1;
941 goto out;
942 }
943 labp = error != 0 ? &label : partp.disklab;
944 start = error1 != 0 ? partp.part->p_offset : 0;
945 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
946 && bip->bi_cyl != 0) {
947 heads = bip->bi_head;
948 sectors = bip->bi_sec;
949 cylinders = bip->bi_cyl;
950 biostotal = heads * sectors * cylinders;
951 realtotal = labp->d_ntracks * labp->d_nsectors *
952 labp->d_ncylinders;
953 if (realtotal > biostotal)
954 cylinders = realtotal / (heads * sectors);
955 } else {
956 heads = labp->d_ntracks;
957 cylinders = labp->d_ncylinders;
958 sectors = labp->d_nsectors;
959 }
960 if (com == LINUX_HDIO_GETGEO) {
961 hdg.start = start;
962 hdg.heads = heads;
963 hdg.cylinders = cylinders;
964 hdg.sectors = sectors;
965 error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
966 goto out;
967 } else {
968 hdg_big.start = start;
969 hdg_big.heads = heads;
970 hdg_big.cylinders = cylinders;
971 hdg_big.sectors = sectors;
972 error = copyout(&hdg_big, SCARG(uap, data),
973 sizeof hdg_big);
974 goto out;
975 }
976
977 default:
978 /*
979 * Unknown to us. If it's on a device, just pass it through
980 * using PTIOCLINUX, the device itself might be able to
981 * make some sense of it.
982 * XXX hack: if the function returns EJUSTRETURN,
983 * it has stuffed a sysctl return value in pt.data.
984 */
985 ioctlf = fp->f_ops->fo_ioctl;
986 pt.com = SCARG(uap, com);
987 pt.data = SCARG(uap, data);
988 error = ioctlf(fp, PTIOCLINUX, &pt);
989 if (error == EJUSTRETURN) {
990 retval[0] = (register_t)pt.data;
991 error = 0;
992 }
993
994 if (error == ENOTTY) {
995 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
996 com));
997 }
998 goto out;
999 }
1000 SCARG(&bia, com) = com;
1001 error = sys_ioctl(curlwp, &bia, retval);
1002 out:
1003 fd_putfile(fd);
1004 return error;
1005 }
1006
1007 /*
1008 * Set I/O permissions for a process. Just set the maximum level
1009 * right away (ignoring the argument), otherwise we would have
1010 * to rely on I/O permission maps, which are not implemented.
1011 */
1012 int
1013 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
1014 {
1015 /* {
1016 syscallarg(int) level;
1017 } */
1018 struct trapframe *fp = l->l_md.md_regs;
1019
1020 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1021 NULL, NULL, NULL, NULL) != 0)
1022 return EPERM;
1023 fp->tf_eflags |= PSL_IOPL;
1024 *retval = 0;
1025 return 0;
1026 }
1027
1028 /*
1029 * See above. If a root process tries to set access to an I/O port,
1030 * just let it have the whole range.
1031 */
1032 int
1033 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1034 {
1035 /* {
1036 syscallarg(unsigned int) lo;
1037 syscallarg(unsigned int) hi;
1038 syscallarg(int) val;
1039 } */
1040 struct trapframe *fp = l->l_md.md_regs;
1041
1042 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1043 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1044 NULL, NULL) != 0)
1045 return EPERM;
1046 if (SCARG(uap, val))
1047 fp->tf_eflags |= PSL_IOPL;
1048 *retval = 0;
1049 return 0;
1050 }
1051
1052 int
1053 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1054 void *arg)
1055 {
1056 return 0;
1057 }
1058
1059 const char *
1060 linux_get_uname_arch(void)
1061 {
1062 static char uname_arch[5] = "i386";
1063
1064 if (uname_arch[1] == '3')
1065 uname_arch[1] += cpu_class;
1066 return uname_arch;
1067 }
1068