linux_machdep.c revision 1.88 1 /* $NetBSD: linux_machdep.c,v 1.88 2003/06/28 14:21:20 darrenr Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.88 2003/06/28 14:21:20 darrenr Exp $");
41
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/sa.h>
66 #include <sys/syscallargs.h>
67 #include <sys/filedesc.h>
68 #include <sys/exec_elf.h>
69 #include <sys/disklabel.h>
70 #include <sys/ioctl.h>
71 #include <miscfs/specfs/specdev.h>
72
73 #include <compat/linux/common/linux_types.h>
74 #include <compat/linux/common/linux_signal.h>
75 #include <compat/linux/common/linux_util.h>
76 #include <compat/linux/common/linux_ioctl.h>
77 #include <compat/linux/common/linux_hdio.h>
78 #include <compat/linux/common/linux_exec.h>
79 #include <compat/linux/common/linux_machdep.h>
80
81 #include <compat/linux/linux_syscallargs.h>
82
83 #include <machine/cpu.h>
84 #include <machine/cpufunc.h>
85 #include <machine/psl.h>
86 #include <machine/reg.h>
87 #include <machine/segments.h>
88 #include <machine/specialreg.h>
89 #include <machine/sysarch.h>
90 #include <machine/vm86.h>
91 #include <machine/vmparam.h>
92
93 /*
94 * To see whether wscons is configured (for virtual console ioctl calls).
95 */
96 #if defined(_KERNEL_OPT)
97 #include "wsdisplay.h"
98 #endif
99 #if (NWSDISPLAY > 0)
100 #include <dev/wscons/wsconsio.h>
101 #include <dev/wscons/wsdisplay_usl_io.h>
102 #if defined(_KERNEL_OPT)
103 #include "opt_xserver.h"
104 #endif
105 #endif
106
107 #ifdef USER_LDT
108 #include <machine/cpu.h>
109 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
110 register_t *));
111 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
112 register_t *));
113 #endif
114
115 #ifdef DEBUG_LINUX
116 #define DPRINTF(a) uprintf a
117 #else
118 #define DPRINTF(a)
119 #endif
120
121 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
122 extern struct disklist *i386_alldisks;
123 static void linux_savecontext __P((struct lwp *, struct trapframe *,
124 sigset_t *, struct linux_sigcontext *));
125 static void linux_rt_sendsig __P((int, sigset_t *, u_long));
126 static void linux_old_sendsig __P((int, sigset_t *, u_long));
127
128 extern char linux_sigcode[], linux_rt_sigcode[];
129 /*
130 * Deal with some i386-specific things in the Linux emulation code.
131 */
132
133 void
134 linux_setregs(l, epp, stack)
135 struct lwp *l;
136 struct exec_package *epp;
137 u_long stack;
138 {
139 struct pcb *pcb = &l->l_addr->u_pcb;
140 struct trapframe *tf;
141
142 #if NNPX > 0
143 /* If we were using the FPU, forget about it. */
144 if (npxproc == l)
145 npxdrop();
146 #endif
147
148 #ifdef USER_LDT
149 pmap_ldt_cleanup(l);
150 #endif
151
152 l->l_md.md_flags &= ~MDP_USEDFPU;
153
154 if (i386_use_fxsave) {
155 pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
156 pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
157 } else
158 pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
159
160 tf = l->l_md.md_regs;
161 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
162 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
163 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
164 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
165 tf->tf_edi = 0;
166 tf->tf_esi = 0;
167 tf->tf_ebp = 0;
168 tf->tf_ebx = (int)l->l_proc->p_psstr;
169 tf->tf_edx = 0;
170 tf->tf_ecx = 0;
171 tf->tf_eax = 0;
172 tf->tf_eip = epp->ep_entry;
173 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
174 tf->tf_eflags = PSL_USERSET;
175 tf->tf_esp = stack;
176 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
177 }
178
179 /*
180 * Send an interrupt to process.
181 *
182 * Stack is set up to allow sigcode stored
183 * in u. to call routine, followed by kcall
184 * to sigreturn routine below. After sigreturn
185 * resets the signal mask, the stack, and the
186 * frame pointer, it returns to the user
187 * specified pc, psl.
188 */
189
190 void
191 linux_sendsig(sig, mask, code)
192 int sig;
193 sigset_t *mask;
194 u_long code;
195 {
196 if (SIGACTION(curproc, sig).sa_flags & SA_SIGINFO)
197 linux_rt_sendsig(sig, mask, code);
198 else
199 linux_old_sendsig(sig, mask, code);
200 }
201
202
203 static void
204 linux_savecontext(l, tf, mask, sc)
205 struct lwp *l;
206 struct trapframe *tf;
207 sigset_t *mask;
208 struct linux_sigcontext *sc;
209 {
210 /* Save register context. */
211 #ifdef VM86
212 if (tf->tf_eflags & PSL_VM) {
213 sc->sc_gs = tf->tf_vm86_gs;
214 sc->sc_fs = tf->tf_vm86_fs;
215 sc->sc_es = tf->tf_vm86_es;
216 sc->sc_ds = tf->tf_vm86_ds;
217 sc->sc_eflags = get_vflags(l);
218 } else
219 #endif
220 {
221 sc->sc_gs = tf->tf_gs;
222 sc->sc_fs = tf->tf_fs;
223 sc->sc_es = tf->tf_es;
224 sc->sc_ds = tf->tf_ds;
225 sc->sc_eflags = tf->tf_eflags;
226 }
227 sc->sc_edi = tf->tf_edi;
228 sc->sc_esi = tf->tf_esi;
229 sc->sc_esp = tf->tf_esp;
230 sc->sc_ebp = tf->tf_ebp;
231 sc->sc_ebx = tf->tf_ebx;
232 sc->sc_edx = tf->tf_edx;
233 sc->sc_ecx = tf->tf_ecx;
234 sc->sc_eax = tf->tf_eax;
235 sc->sc_eip = tf->tf_eip;
236 sc->sc_cs = tf->tf_cs;
237 sc->sc_esp_at_signal = tf->tf_esp;
238 sc->sc_ss = tf->tf_ss;
239 sc->sc_err = tf->tf_err;
240 sc->sc_trapno = tf->tf_trapno;
241 sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
242 sc->sc_387 = NULL;
243
244 /* Save signal stack. */
245 /* Linux doesn't save the onstack flag in sigframe */
246
247 /* Save signal mask. */
248 native_to_linux_old_sigset(&sc->sc_mask, mask);
249 }
250
251 static void
252 linux_rt_sendsig(sig, mask, code)
253 int sig;
254 sigset_t *mask;
255 u_long code;
256 {
257 struct lwp *l = curlwp;
258 struct proc *p = l->l_proc;
259 struct trapframe *tf;
260 struct linux_rt_sigframe *fp, frame;
261 int onstack;
262 sig_t catcher = SIGACTION(p, sig).sa_handler;
263 struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
264
265 tf = l->l_md.md_regs;
266 /* Do we need to jump onto the signal stack? */
267 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
268 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
269
270
271 /* Allocate space for the signal handler context. */
272 if (onstack)
273 fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
274 sas->ss_size);
275 else
276 fp = (struct linux_rt_sigframe *)tf->tf_esp;
277 fp--;
278
279 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x\n", onstack, fp,
280 sig, tf->tf_eip));
281
282 /* Build stack frame for signal trampoline. */
283 frame.sf_handler = catcher;
284 frame.sf_sig = native_to_linux_signo[sig];
285 frame.sf_sip = &fp->sf_si;
286 frame.sf_scp = &fp->sf_sc;
287
288 /*
289 * XXX: zero siginfo out until we provide more info.
290 */
291 (void)memset(&frame.sf_si, 0, sizeof(frame.sf_si));
292
293 /* Save register context. */
294 linux_savecontext(l, tf, mask, &frame.sf_sc);
295
296 if (copyout(&frame, fp, sizeof(frame)) != 0) {
297 /*
298 * Process has trashed its stack; give it an illegal
299 * instruction to halt it in its tracks.
300 */
301 sigexit(l, SIGILL);
302 /* NOTREACHED */
303 }
304
305 /*
306 * Build context to run handler in.
307 */
308 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
309 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
310 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
311 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
312 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
313 (linux_rt_sigcode - linux_sigcode);
314 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
315 tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
316 tf->tf_esp = (int)fp;
317 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
318
319 /* Remember that we're now on the signal stack. */
320 if (onstack)
321 sas->ss_flags |= SS_ONSTACK;
322 }
323
324 static void
325 linux_old_sendsig(sig, mask, code)
326 int sig;
327 sigset_t *mask;
328 u_long code;
329 {
330 struct lwp *l = curlwp;
331 struct proc *p = l->l_proc;
332 struct trapframe *tf;
333 struct linux_sigframe *fp, frame;
334 int onstack;
335 sig_t catcher = SIGACTION(p, sig).sa_handler;
336 struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
337
338 tf = l->l_md.md_regs;
339
340 /* Do we need to jump onto the signal stack? */
341 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
342 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
343
344 /* Allocate space for the signal handler context. */
345 if (onstack)
346 fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
347 sas->ss_size);
348 else
349 fp = (struct linux_sigframe *)tf->tf_esp;
350 fp--;
351
352 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x\n",
353 onstack, fp, sig, tf->tf_eip));
354
355 /* Build stack frame for signal trampoline. */
356 frame.sf_handler = catcher;
357 frame.sf_sig = native_to_linux_signo[sig];
358
359 linux_savecontext(l, tf, mask, &frame.sf_sc);
360
361 if (copyout(&frame, fp, sizeof(frame)) != 0) {
362 /*
363 * Process has trashed its stack; give it an illegal
364 * instruction to halt it in its tracks.
365 */
366 sigexit(l, SIGILL);
367 /* NOTREACHED */
368 }
369
370 /*
371 * Build context to run handler in.
372 */
373 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
374 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
375 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
376 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
377 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
378 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
379 tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
380 tf->tf_esp = (int)fp;
381 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
382
383 /* Remember that we're now on the signal stack. */
384 if (onstack)
385 sas->ss_flags |= SS_ONSTACK;
386 }
387
388 /*
389 * System call to cleanup state after a signal
390 * has been taken. Reset signal mask and
391 * stack state from context left by sendsig (above).
392 * Return to previous pc and psl as specified by
393 * context left by sendsig. Check carefully to
394 * make sure that the user has not modified the
395 * psl to gain improper privileges or to cause
396 * a machine fault.
397 */
398 int
399 linux_sys_rt_sigreturn(l, v, retval)
400 struct lwp *l;
401 void *v;
402 register_t *retval;
403 {
404 /* XXX XAX write me */
405 return(ENOSYS);
406 }
407
408 int
409 linux_sys_sigreturn(l, v, retval)
410 struct lwp *l;
411 void *v;
412 register_t *retval;
413 {
414 struct linux_sys_sigreturn_args /* {
415 syscallarg(struct linux_sigcontext *) scp;
416 } */ *uap = v;
417 struct proc *p = l->l_proc;
418 struct linux_sigcontext *scp, context;
419 struct trapframe *tf;
420 sigset_t mask;
421 ssize_t ss_gap;
422 struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
423
424 /*
425 * The trampoline code hands us the context.
426 * It is unsafe to keep track of it ourselves, in the event that a
427 * program jumps out of a signal handler.
428 */
429 scp = SCARG(uap, scp);
430 if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
431 return EFAULT;
432
433 /* Restore register context. */
434 tf = l->l_md.md_regs;
435
436 DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
437 #ifdef VM86
438 if (context.sc_eflags & PSL_VM) {
439 void syscall_vm86 __P((struct trapframe));
440
441 tf->tf_vm86_gs = context.sc_gs;
442 tf->tf_vm86_fs = context.sc_fs;
443 tf->tf_vm86_es = context.sc_es;
444 tf->tf_vm86_ds = context.sc_ds;
445 set_vflags(l, context.sc_eflags);
446 p->p_md.md_syscall = syscall_vm86;
447 } else
448 #endif
449 {
450 /*
451 * Check for security violations. If we're returning to
452 * protected mode, the CPU will validate the segment registers
453 * automatically and generate a trap on violations. We handle
454 * the trap, rather than doing all of the checking here.
455 */
456 if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
457 !USERMODE(context.sc_cs, context.sc_eflags))
458 return EINVAL;
459
460 tf->tf_gs = context.sc_gs;
461 tf->tf_fs = context.sc_fs;
462 tf->tf_es = context.sc_es;
463 tf->tf_ds = context.sc_ds;
464 #ifdef VM86
465 if (tf->tf_eflags & PSL_VM)
466 (*p->p_emul->e_syscall_intern)(p);
467 #endif
468 tf->tf_eflags = context.sc_eflags;
469 }
470 tf->tf_edi = context.sc_edi;
471 tf->tf_esi = context.sc_esi;
472 tf->tf_ebp = context.sc_ebp;
473 tf->tf_ebx = context.sc_ebx;
474 tf->tf_edx = context.sc_edx;
475 tf->tf_ecx = context.sc_ecx;
476 tf->tf_eax = context.sc_eax;
477 tf->tf_eip = context.sc_eip;
478 tf->tf_cs = context.sc_cs;
479 tf->tf_esp = context.sc_esp_at_signal;
480 tf->tf_ss = context.sc_ss;
481
482 /* Restore signal stack. */
483 /*
484 * Linux really does it this way; it doesn't have space in sigframe
485 * to save the onstack flag.
486 */
487 ss_gap = (ssize_t)
488 ((caddr_t) context.sc_esp_at_signal - (caddr_t) sas->ss_sp);
489 if (ss_gap >= 0 && ss_gap < sas->ss_size)
490 sas->ss_flags |= SS_ONSTACK;
491 else
492 sas->ss_flags &= ~SS_ONSTACK;
493
494 /* Restore signal mask. */
495 linux_old_to_native_sigset(&mask, &context.sc_mask);
496 (void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
497 DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
498 return EJUSTRETURN;
499 }
500
501 #ifdef USER_LDT
502
503 int
504 linux_read_ldt(l, uap, retval)
505 struct lwp *l;
506 struct linux_sys_modify_ldt_args /* {
507 syscallarg(int) func;
508 syscallarg(void *) ptr;
509 syscallarg(size_t) bytecount;
510 } */ *uap;
511 register_t *retval;
512 {
513 struct proc *p = l->l_proc;
514 struct i386_get_ldt_args gl;
515 int error;
516 caddr_t sg;
517 char *parms;
518
519 DPRINTF(("linux_read_ldt!"));
520 sg = stackgap_init(p, 0);
521
522 gl.start = 0;
523 gl.desc = SCARG(uap, ptr);
524 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
525
526 parms = stackgap_alloc(p, &sg, sizeof(gl));
527
528 if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
529 return (error);
530
531 if ((error = i386_get_ldt(l, parms, retval)) != 0)
532 return (error);
533
534 *retval *= sizeof(union descriptor);
535 return (0);
536 }
537
538 struct linux_ldt_info {
539 u_int entry_number;
540 u_long base_addr;
541 u_int limit;
542 u_int seg_32bit:1;
543 u_int contents:2;
544 u_int read_exec_only:1;
545 u_int limit_in_pages:1;
546 u_int seg_not_present:1;
547 u_int useable:1;
548 };
549
550 int
551 linux_write_ldt(l, uap, retval)
552 struct lwp *l;
553 struct linux_sys_modify_ldt_args /* {
554 syscallarg(int) func;
555 syscallarg(void *) ptr;
556 syscallarg(size_t) bytecount;
557 } */ *uap;
558 register_t *retval;
559 {
560 struct proc *p = l->l_proc;
561 struct linux_ldt_info ldt_info;
562 struct segment_descriptor sd;
563 struct i386_set_ldt_args sl;
564 int error;
565 caddr_t sg;
566 char *parms;
567 int oldmode = (int)retval[0];
568
569 DPRINTF(("linux_write_ldt %d\n", oldmode));
570 if (SCARG(uap, bytecount) != sizeof(ldt_info))
571 return (EINVAL);
572 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
573 return error;
574 if (ldt_info.entry_number >= 8192)
575 return (EINVAL);
576 if (ldt_info.contents == 3) {
577 if (oldmode)
578 return (EINVAL);
579 if (ldt_info.seg_not_present)
580 return (EINVAL);
581 }
582
583 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
584 (oldmode || (ldt_info.contents == 0 &&
585 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
586 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
587 ldt_info.useable == 0))) {
588 /* this means you should zero the ldt */
589 (void)memset(&sd, 0, sizeof(sd));
590 } else {
591 sd.sd_lobase = ldt_info.base_addr & 0xffffff;
592 sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
593 sd.sd_lolimit = ldt_info.limit & 0xffff;
594 sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
595 sd.sd_type = 16 | (ldt_info.contents << 2) |
596 (!ldt_info.read_exec_only << 1);
597 sd.sd_dpl = SEL_UPL;
598 sd.sd_p = !ldt_info.seg_not_present;
599 sd.sd_def32 = ldt_info.seg_32bit;
600 sd.sd_gran = ldt_info.limit_in_pages;
601 if (!oldmode)
602 sd.sd_xx = ldt_info.useable;
603 else
604 sd.sd_xx = 0;
605 }
606 sg = stackgap_init(p, 0);
607 sl.start = ldt_info.entry_number;
608 sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
609 sl.num = 1;
610
611 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
612 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
613
614 parms = stackgap_alloc(p, &sg, sizeof(sl));
615
616 if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
617 return (error);
618 if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
619 return (error);
620
621 if ((error = i386_set_ldt(l, parms, retval)) != 0)
622 return (error);
623
624 *retval = 0;
625 return (0);
626 }
627
628 #endif /* USER_LDT */
629
630 int
631 linux_sys_modify_ldt(l, v, retval)
632 struct lwp *l;
633 void *v;
634 register_t *retval;
635 {
636 struct linux_sys_modify_ldt_args /* {
637 syscallarg(int) func;
638 syscallarg(void *) ptr;
639 syscallarg(size_t) bytecount;
640 } */ *uap = v;
641
642 switch (SCARG(uap, func)) {
643 #ifdef USER_LDT
644 case 0:
645 return linux_read_ldt(l, uap, retval);
646 case 1:
647 retval[0] = 1;
648 return linux_write_ldt(l, uap, retval);
649 case 2:
650 #ifdef notyet
651 return (linux_read_default_ldt(l, uap, retval);
652 #else
653 return (ENOSYS);
654 #endif
655 case 0x11:
656 retval[0] = 0;
657 return linux_write_ldt(l, uap, retval);
658 #endif /* USER_LDT */
659
660 default:
661 return (ENOSYS);
662 }
663 }
664
665 /*
666 * XXX Pathetic hack to make svgalib work. This will fake the major
667 * device number of an opened VT so that svgalib likes it. grmbl.
668 * Should probably do it 'wrong the right way' and use a mapping
669 * array for all major device numbers, and map linux_mknod too.
670 */
671 dev_t
672 linux_fakedev(dev, raw)
673 dev_t dev;
674 int raw;
675 {
676 if (raw) {
677 #if (NWSDISPLAY > 0)
678 extern const struct cdevsw wsdisplay_cdevsw;
679 if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
680 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
681 #endif
682 }
683
684 return dev;
685 }
686
687 #if (NWSDISPLAY > 0)
688 /*
689 * That's not complete, but enough to get an X server running.
690 */
691 #define NR_KEYS 128
692 static const u_short plain_map[NR_KEYS] = {
693 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
694 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
695 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
696 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
697 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
698 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
699 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
700 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
701 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
702 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
703 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
704 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
705 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
706 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
707 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
708 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
709 }, shift_map[NR_KEYS] = {
710 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
711 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
712 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
713 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
714 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
715 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
716 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
717 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
718 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
719 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
720 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
721 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
722 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
723 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
724 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
725 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
726 }, altgr_map[NR_KEYS] = {
727 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
728 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
729 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
730 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
731 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
732 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
733 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
734 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
735 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
736 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
737 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
738 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
739 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
740 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
741 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
742 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
743 }, ctrl_map[NR_KEYS] = {
744 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
745 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
746 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
747 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
748 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
749 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
750 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
751 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
752 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
753 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
754 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
755 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
756 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
757 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
758 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
759 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
760 };
761
762 const u_short * const linux_keytabs[] = {
763 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
764 };
765 #endif
766
767 static struct biosdisk_info *
768 fd2biosinfo(p, fp)
769 struct proc *p;
770 struct file *fp;
771 {
772 struct vnode *vp;
773 const char *blkname;
774 char diskname[16];
775 int i;
776 struct nativedisk_info *nip;
777 struct disklist *dl = i386_alldisks;
778
779 if (fp->f_type != DTYPE_VNODE)
780 return NULL;
781 vp = (struct vnode *)fp->f_data;
782
783 if (vp->v_type != VBLK)
784 return NULL;
785
786 blkname = devsw_blk2name(major(vp->v_rdev));
787 snprintf(diskname, sizeof diskname, "%s%u", blkname,
788 DISKUNIT(vp->v_rdev));
789
790 for (i = 0; i < dl->dl_nnativedisks; i++) {
791 nip = &dl->dl_nativedisks[i];
792 if (strcmp(diskname, nip->ni_devname))
793 continue;
794 if (nip->ni_nmatches != 0)
795 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
796 }
797
798 return NULL;
799 }
800
801
802 /*
803 * We come here in a last attempt to satisfy a Linux ioctl() call
804 */
805 int
806 linux_machdepioctl(l, v, retval)
807 struct lwp *l;
808 void *v;
809 register_t *retval;
810 {
811 struct linux_sys_ioctl_args /* {
812 syscallarg(int) fd;
813 syscallarg(u_long) com;
814 syscallarg(caddr_t) data;
815 } */ *uap = v;
816 struct sys_ioctl_args bia;
817 u_long com;
818 int error, error1;
819 #if (NWSDISPLAY > 0)
820 struct vt_mode lvt;
821 caddr_t bvtp, sg;
822 struct kbentry kbe;
823 #endif
824 struct linux_hd_geometry hdg;
825 struct linux_hd_big_geometry hdg_big;
826 struct biosdisk_info *bip;
827 struct filedesc *fdp;
828 struct file *fp;
829 int fd;
830 struct disklabel label, *labp;
831 struct partinfo partp;
832 int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
833 u_long start, biostotal, realtotal;
834 u_char heads, sectors;
835 u_int cylinders;
836 struct ioctl_pt pt;
837 struct proc *p = l->l_proc;
838
839 fd = SCARG(uap, fd);
840 SCARG(&bia, fd) = fd;
841 SCARG(&bia, data) = SCARG(uap, data);
842 com = SCARG(uap, com);
843
844 fdp = p->p_fd;
845
846 if ((fp = fd_getfile(fdp, fd)) == NULL)
847 return (EBADF);
848
849 FILE_USE(fp);
850
851 switch (com) {
852 #if (NWSDISPLAY > 0)
853 case LINUX_KDGKBMODE:
854 com = KDGKBMODE;
855 break;
856 case LINUX_KDSKBMODE:
857 com = KDSKBMODE;
858 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
859 SCARG(&bia, data) = (caddr_t)K_RAW;
860 break;
861 case LINUX_KIOCSOUND:
862 SCARG(&bia, data) =
863 (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
864 /* fall through */
865 case LINUX_KDMKTONE:
866 com = KDMKTONE;
867 break;
868 case LINUX_KDSETMODE:
869 com = KDSETMODE;
870 break;
871 case LINUX_KDGETMODE:
872 /* KD_* values are equal to the wscons numbers */
873 com = WSDISPLAYIO_GMODE;
874 break;
875 case LINUX_KDENABIO:
876 com = KDENABIO;
877 break;
878 case LINUX_KDDISABIO:
879 com = KDDISABIO;
880 break;
881 case LINUX_KDGETLED:
882 com = KDGETLED;
883 break;
884 case LINUX_KDSETLED:
885 com = KDSETLED;
886 break;
887 case LINUX_VT_OPENQRY:
888 com = VT_OPENQRY;
889 break;
890 case LINUX_VT_GETMODE:
891 SCARG(&bia, com) = VT_GETMODE;
892 /* XXX NJWLWP */
893 if ((error = sys_ioctl(curlwp, &bia, retval)))
894 goto out;
895 if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
896 sizeof (struct vt_mode))))
897 goto out;
898 lvt.relsig = native_to_linux_signo[lvt.relsig];
899 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
900 lvt.frsig = native_to_linux_signo[lvt.frsig];
901 error = copyout((caddr_t)&lvt, SCARG(uap, data),
902 sizeof (struct vt_mode));
903 goto out;
904 case LINUX_VT_SETMODE:
905 com = VT_SETMODE;
906 if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
907 sizeof (struct vt_mode))))
908 goto out;
909 lvt.relsig = linux_to_native_signo[lvt.relsig];
910 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
911 lvt.frsig = linux_to_native_signo[lvt.frsig];
912 sg = stackgap_init(p, 0);
913 bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
914 if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
915 goto out;
916 SCARG(&bia, data) = bvtp;
917 break;
918 case LINUX_VT_DISALLOCATE:
919 /* XXX should use WSDISPLAYIO_DELSCREEN */
920 error = 0;
921 goto out;
922 case LINUX_VT_RELDISP:
923 com = VT_RELDISP;
924 break;
925 case LINUX_VT_ACTIVATE:
926 com = VT_ACTIVATE;
927 break;
928 case LINUX_VT_WAITACTIVE:
929 com = VT_WAITACTIVE;
930 break;
931 case LINUX_VT_GETSTATE:
932 com = VT_GETSTATE;
933 break;
934 case LINUX_KDGKBTYPE:
935 /* This is what Linux does. */
936 error = subyte(SCARG(uap, data), KB_101);
937 goto out;
938 case LINUX_KDGKBENT:
939 /*
940 * The Linux KDGKBENT ioctl is different from the
941 * SYSV original. So we handle it in machdep code.
942 * XXX We should use keyboard mapping information
943 * from wsdisplay, but this would be expensive.
944 */
945 if ((error = copyin(SCARG(uap, data), &kbe,
946 sizeof(struct kbentry))))
947 goto out;
948 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
949 || kbe.kb_index >= NR_KEYS) {
950 error = EINVAL;
951 goto out;
952 }
953 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
954 error = copyout(&kbe, SCARG(uap, data),
955 sizeof(struct kbentry));
956 goto out;
957 #endif
958 case LINUX_HDIO_GETGEO:
959 case LINUX_HDIO_GETGEO_BIG:
960 /*
961 * Try to mimic Linux behaviour: return the BIOS geometry
962 * if possible (extending its # of cylinders if it's beyond
963 * the 1023 limit), fall back to the MI geometry (i.e.
964 * the real geometry) if not found, by returning an
965 * error. See common/linux_hdio.c
966 */
967 bip = fd2biosinfo(p, fp);
968 ioctlf = fp->f_ops->fo_ioctl;
969 error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, l);
970 error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, l);
971 if (error != 0 && error1 != 0) {
972 error = error1;
973 goto out;
974 }
975 labp = error != 0 ? &label : partp.disklab;
976 start = error1 != 0 ? partp.part->p_offset : 0;
977 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
978 && bip->bi_cyl != 0) {
979 heads = bip->bi_head;
980 sectors = bip->bi_sec;
981 cylinders = bip->bi_cyl;
982 biostotal = heads * sectors * cylinders;
983 realtotal = labp->d_ntracks * labp->d_nsectors *
984 labp->d_ncylinders;
985 if (realtotal > biostotal)
986 cylinders = realtotal / (heads * sectors);
987 } else {
988 heads = labp->d_ntracks;
989 cylinders = labp->d_ncylinders;
990 sectors = labp->d_nsectors;
991 }
992 if (com == LINUX_HDIO_GETGEO) {
993 hdg.start = start;
994 hdg.heads = heads;
995 hdg.cylinders = cylinders;
996 hdg.sectors = sectors;
997 error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
998 goto out;
999 } else {
1000 hdg_big.start = start;
1001 hdg_big.heads = heads;
1002 hdg_big.cylinders = cylinders;
1003 hdg_big.sectors = sectors;
1004 error = copyout(&hdg_big, SCARG(uap, data),
1005 sizeof hdg_big);
1006 goto out;
1007 }
1008
1009 default:
1010 /*
1011 * Unknown to us. If it's on a device, just pass it through
1012 * using PTIOCLINUX, the device itself might be able to
1013 * make some sense of it.
1014 * XXX hack: if the function returns EJUSTRETURN,
1015 * it has stuffed a sysctl return value in pt.data.
1016 */
1017 FILE_USE(fp);
1018 ioctlf = fp->f_ops->fo_ioctl;
1019 pt.com = SCARG(uap, com);
1020 pt.data = SCARG(uap, data);
1021 error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, l);
1022 FILE_UNUSE(fp, l);
1023 if (error == EJUSTRETURN) {
1024 retval[0] = (register_t)pt.data;
1025 error = 0;
1026 }
1027
1028 if (error == ENOTTY)
1029 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1030 com));
1031 goto out;
1032 }
1033 SCARG(&bia, com) = com;
1034 /* XXX NJWLWP */
1035 error = sys_ioctl(curlwp, &bia, retval);
1036 out:
1037 FILE_UNUSE(fp ,l);
1038 return error;
1039 }
1040
1041 /*
1042 * Set I/O permissions for a process. Just set the maximum level
1043 * right away (ignoring the argument), otherwise we would have
1044 * to rely on I/O permission maps, which are not implemented.
1045 */
1046 int
1047 linux_sys_iopl(l, v, retval)
1048 struct lwp *l;
1049 void *v;
1050 register_t *retval;
1051 {
1052 #if 0
1053 struct linux_sys_iopl_args /* {
1054 syscallarg(int) level;
1055 } */ *uap = v;
1056 #endif
1057 struct proc *p = l->l_proc;
1058 struct trapframe *fp = l->l_md.md_regs;
1059
1060 if (suser(p->p_ucred, &p->p_acflag) != 0)
1061 return EPERM;
1062 fp->tf_eflags |= PSL_IOPL;
1063 *retval = 0;
1064 return 0;
1065 }
1066
1067 /*
1068 * See above. If a root process tries to set access to an I/O port,
1069 * just let it have the whole range.
1070 */
1071 int
1072 linux_sys_ioperm(l, v, retval)
1073 struct lwp *l;
1074 void *v;
1075 register_t *retval;
1076 {
1077 struct linux_sys_ioperm_args /* {
1078 syscallarg(unsigned int) lo;
1079 syscallarg(unsigned int) hi;
1080 syscallarg(int) val;
1081 } */ *uap = v;
1082 struct proc *p = l->l_proc;
1083 struct trapframe *fp = l->l_md.md_regs;
1084
1085 if (suser(p->p_ucred, &p->p_acflag) != 0)
1086 return EPERM;
1087 if (SCARG(uap, val))
1088 fp->tf_eflags |= PSL_IOPL;
1089 *retval = 0;
1090 return 0;
1091 }
1092