linux_machdep.c revision 1.86 1 /* $NetBSD: linux_machdep.c,v 1.86 2003/02/27 16:04:15 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.86 2003/02/27 16:04:15 yamt Exp $");
41
42 #if defined(_KERNEL_OPT)
43 #include "opt_vm86.h"
44 #include "opt_user_ldt.h"
45 #endif
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/signalvar.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/user.h>
53 #include <sys/buf.h>
54 #include <sys/reboot.h>
55 #include <sys/conf.h>
56 #include <sys/exec.h>
57 #include <sys/file.h>
58 #include <sys/callout.h>
59 #include <sys/malloc.h>
60 #include <sys/mbuf.h>
61 #include <sys/msgbuf.h>
62 #include <sys/mount.h>
63 #include <sys/vnode.h>
64 #include <sys/device.h>
65 #include <sys/sa.h>
66 #include <sys/syscallargs.h>
67 #include <sys/filedesc.h>
68 #include <sys/exec_elf.h>
69 #include <sys/disklabel.h>
70 #include <sys/ioctl.h>
71 #include <miscfs/specfs/specdev.h>
72
73 #include <compat/linux/common/linux_types.h>
74 #include <compat/linux/common/linux_signal.h>
75 #include <compat/linux/common/linux_util.h>
76 #include <compat/linux/common/linux_ioctl.h>
77 #include <compat/linux/common/linux_hdio.h>
78 #include <compat/linux/common/linux_exec.h>
79 #include <compat/linux/common/linux_machdep.h>
80
81 #include <compat/linux/linux_syscallargs.h>
82
83 #include <machine/cpu.h>
84 #include <machine/cpufunc.h>
85 #include <machine/psl.h>
86 #include <machine/reg.h>
87 #include <machine/segments.h>
88 #include <machine/specialreg.h>
89 #include <machine/sysarch.h>
90 #include <machine/vm86.h>
91 #include <machine/vmparam.h>
92
93 /*
94 * To see whether wscons is configured (for virtual console ioctl calls).
95 */
96 #if defined(_KERNEL_OPT)
97 #include "wsdisplay.h"
98 #endif
99 #if (NWSDISPLAY > 0)
100 #include <dev/wscons/wsconsio.h>
101 #include <dev/wscons/wsdisplay_usl_io.h>
102 #if defined(_KERNEL_OPT)
103 #include "opt_xserver.h"
104 #endif
105 #endif
106
107 #ifdef USER_LDT
108 #include <machine/cpu.h>
109 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
110 register_t *));
111 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
112 register_t *));
113 #endif
114
115 #ifdef DEBUG_LINUX
116 #define DPRINTF(a) uprintf a
117 #else
118 #define DPRINTF(a)
119 #endif
120
121 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
122 extern struct disklist *i386_alldisks;
123 static void linux_savecontext __P((struct lwp *, struct trapframe *,
124 sigset_t *, struct linux_sigcontext *));
125 static void linux_rt_sendsig __P((int, sigset_t *, u_long));
126 static void linux_old_sendsig __P((int, sigset_t *, u_long));
127
128 extern char linux_sigcode[], linux_rt_sigcode[];
129 /*
130 * Deal with some i386-specific things in the Linux emulation code.
131 */
132
133 void
134 linux_setregs(l, epp, stack)
135 struct lwp *l;
136 struct exec_package *epp;
137 u_long stack;
138 {
139 struct pcb *pcb = &l->l_addr->u_pcb;
140 struct trapframe *tf;
141
142 #if NNPX > 0
143 /* If we were using the FPU, forget about it. */
144 if (npxproc == l)
145 npxdrop();
146 #endif
147
148 #ifdef USER_LDT
149 pmap_ldt_cleanup(l);
150 #endif
151
152 l->l_md.md_flags &= ~MDP_USEDFPU;
153
154 if (i386_use_fxsave) {
155 pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
156 pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
157 } else
158 pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
159
160 tf = l->l_md.md_regs;
161 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
162 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
163 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
164 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
165 tf->tf_edi = 0;
166 tf->tf_esi = 0;
167 tf->tf_ebp = 0;
168 tf->tf_ebx = (int)l->l_proc->p_psstr;
169 tf->tf_edx = 0;
170 tf->tf_ecx = 0;
171 tf->tf_eax = 0;
172 tf->tf_eip = epp->ep_entry;
173 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
174 tf->tf_eflags = PSL_USERSET;
175 tf->tf_esp = stack;
176 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
177 }
178
179 /*
180 * Send an interrupt to process.
181 *
182 * Stack is set up to allow sigcode stored
183 * in u. to call routine, followed by kcall
184 * to sigreturn routine below. After sigreturn
185 * resets the signal mask, the stack, and the
186 * frame pointer, it returns to the user
187 * specified pc, psl.
188 */
189
190 void
191 linux_sendsig(sig, mask, code)
192 int sig;
193 sigset_t *mask;
194 u_long code;
195 {
196 if (SIGACTION(curproc, sig).sa_flags & SA_SIGINFO)
197 linux_rt_sendsig(sig, mask, code);
198 else
199 linux_old_sendsig(sig, mask, code);
200 }
201
202
203 static void
204 linux_savecontext(l, tf, mask, sc)
205 struct lwp *l;
206 struct trapframe *tf;
207 sigset_t *mask;
208 struct linux_sigcontext *sc;
209 {
210 /* Save register context. */
211 #ifdef VM86
212 if (tf->tf_eflags & PSL_VM) {
213 sc->sc_gs = tf->tf_vm86_gs;
214 sc->sc_fs = tf->tf_vm86_fs;
215 sc->sc_es = tf->tf_vm86_es;
216 sc->sc_ds = tf->tf_vm86_ds;
217 sc->sc_eflags = get_vflags(l);
218 } else
219 #endif
220 {
221 sc->sc_gs = tf->tf_gs;
222 sc->sc_fs = tf->tf_fs;
223 sc->sc_es = tf->tf_es;
224 sc->sc_ds = tf->tf_ds;
225 sc->sc_eflags = tf->tf_eflags;
226 }
227 sc->sc_edi = tf->tf_edi;
228 sc->sc_esi = tf->tf_esi;
229 sc->sc_esp = tf->tf_esp;
230 sc->sc_ebp = tf->tf_ebp;
231 sc->sc_ebx = tf->tf_ebx;
232 sc->sc_edx = tf->tf_edx;
233 sc->sc_ecx = tf->tf_ecx;
234 sc->sc_eax = tf->tf_eax;
235 sc->sc_eip = tf->tf_eip;
236 sc->sc_cs = tf->tf_cs;
237 sc->sc_esp_at_signal = tf->tf_esp;
238 sc->sc_ss = tf->tf_ss;
239 sc->sc_err = tf->tf_err;
240 sc->sc_trapno = tf->tf_trapno;
241 sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
242 sc->sc_387 = NULL;
243
244 /* Save signal stack. */
245 /* Linux doesn't save the onstack flag in sigframe */
246
247 /* Save signal mask. */
248 native_to_linux_old_sigset(&sc->sc_mask, mask);
249 }
250
251 static void
252 linux_rt_sendsig(sig, mask, code)
253 int sig;
254 sigset_t *mask;
255 u_long code;
256 {
257 struct lwp *l = curlwp;
258 struct proc *p = l->l_proc;
259 struct trapframe *tf;
260 struct linux_rt_sigframe *fp, frame;
261 int onstack;
262 sig_t catcher = SIGACTION(p, sig).sa_handler;
263 struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
264
265 tf = l->l_md.md_regs;
266 /* Do we need to jump onto the signal stack? */
267 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
268 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
269
270
271 /* Allocate space for the signal handler context. */
272 if (onstack)
273 fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
274 sas->ss_size);
275 else
276 fp = (struct linux_rt_sigframe *)tf->tf_esp;
277 fp--;
278
279 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x\n", onstack, fp,
280 sig, tf->tf_eip));
281
282 /* Build stack frame for signal trampoline. */
283 frame.sf_handler = catcher;
284 frame.sf_sig = native_to_linux_signo[sig];
285 frame.sf_sip = &fp->sf_si;
286 frame.sf_scp = &fp->sf_sc;
287
288 /*
289 * XXX: zero siginfo out until we provide more info.
290 */
291 (void)memset(&frame.sf_si, 0, sizeof(frame.sf_si));
292
293 /* Save register context. */
294 linux_savecontext(l, tf, mask, &frame.sf_sc);
295
296 if (copyout(&frame, fp, sizeof(frame)) != 0) {
297 /*
298 * Process has trashed its stack; give it an illegal
299 * instruction to halt it in its tracks.
300 */
301 sigexit(l, SIGILL);
302 /* NOTREACHED */
303 }
304
305 /*
306 * Build context to run handler in.
307 */
308 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
309 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
310 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
311 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
312 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
313 (linux_rt_sigcode - linux_sigcode);
314 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
315 tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
316 tf->tf_esp = (int)fp;
317 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
318
319 /* Remember that we're now on the signal stack. */
320 if (onstack)
321 sas->ss_flags |= SS_ONSTACK;
322 }
323
324 static void
325 linux_old_sendsig(sig, mask, code)
326 int sig;
327 sigset_t *mask;
328 u_long code;
329 {
330 struct lwp *l = curlwp;
331 struct proc *p = l->l_proc;
332 struct trapframe *tf;
333 struct linux_sigframe *fp, frame;
334 int onstack;
335 sig_t catcher = SIGACTION(p, sig).sa_handler;
336 struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
337
338 tf = l->l_md.md_regs;
339
340 /* Do we need to jump onto the signal stack? */
341 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
342 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
343
344 /* Allocate space for the signal handler context. */
345 if (onstack)
346 fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
347 sas->ss_size);
348 else
349 fp = (struct linux_sigframe *)tf->tf_esp;
350 fp--;
351
352 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x\n",
353 onstack, fp, sig, tf->tf_eip));
354
355 /* Build stack frame for signal trampoline. */
356 frame.sf_handler = catcher;
357 frame.sf_sig = native_to_linux_signo[sig];
358
359 linux_savecontext(l, tf, mask, &frame.sf_sc);
360
361 if (copyout(&frame, fp, sizeof(frame)) != 0) {
362 /*
363 * Process has trashed its stack; give it an illegal
364 * instruction to halt it in its tracks.
365 */
366 sigexit(l, SIGILL);
367 /* NOTREACHED */
368 }
369
370 /*
371 * Build context to run handler in.
372 */
373 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
374 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
375 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
376 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
377 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
378 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
379 tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
380 tf->tf_esp = (int)fp;
381 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
382
383 /* Remember that we're now on the signal stack. */
384 if (onstack)
385 sas->ss_flags |= SS_ONSTACK;
386 }
387
388 /*
389 * System call to cleanup state after a signal
390 * has been taken. Reset signal mask and
391 * stack state from context left by sendsig (above).
392 * Return to previous pc and psl as specified by
393 * context left by sendsig. Check carefully to
394 * make sure that the user has not modified the
395 * psl to gain improper privileges or to cause
396 * a machine fault.
397 */
398 int
399 linux_sys_rt_sigreturn(l, v, retval)
400 struct lwp *l;
401 void *v;
402 register_t *retval;
403 {
404 /* XXX XAX write me */
405 return(ENOSYS);
406 }
407
408 int
409 linux_sys_sigreturn(l, v, retval)
410 struct lwp *l;
411 void *v;
412 register_t *retval;
413 {
414 struct linux_sys_sigreturn_args /* {
415 syscallarg(struct linux_sigcontext *) scp;
416 } */ *uap = v;
417 struct proc *p = l->l_proc;
418 struct linux_sigcontext *scp, context;
419 struct trapframe *tf;
420 sigset_t mask;
421 ssize_t ss_gap;
422 struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
423
424 /*
425 * The trampoline code hands us the context.
426 * It is unsafe to keep track of it ourselves, in the event that a
427 * program jumps out of a signal handler.
428 */
429 scp = SCARG(uap, scp);
430 if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
431 return EFAULT;
432
433 /* Restore register context. */
434 tf = l->l_md.md_regs;
435
436 DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
437 #ifdef VM86
438 if (context.sc_eflags & PSL_VM) {
439 void syscall_vm86 __P((struct trapframe));
440
441 tf->tf_vm86_gs = context.sc_gs;
442 tf->tf_vm86_fs = context.sc_fs;
443 tf->tf_vm86_es = context.sc_es;
444 tf->tf_vm86_ds = context.sc_ds;
445 set_vflags(l, context.sc_eflags);
446 p->p_md.md_syscall = syscall_vm86;
447 } else
448 #endif
449 {
450 /*
451 * Check for security violations. If we're returning to
452 * protected mode, the CPU will validate the segment registers
453 * automatically and generate a trap on violations. We handle
454 * the trap, rather than doing all of the checking here.
455 */
456 if (((context.sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
457 !USERMODE(context.sc_cs, context.sc_eflags))
458 return EINVAL;
459
460 tf->tf_gs = context.sc_gs;
461 tf->tf_fs = context.sc_fs;
462 tf->tf_es = context.sc_es;
463 tf->tf_ds = context.sc_ds;
464 #ifdef VM86
465 if (tf->tf_eflags & PSL_VM)
466 (*p->p_emul->e_syscall_intern)(p);
467 #endif
468 tf->tf_eflags = context.sc_eflags;
469 }
470 tf->tf_edi = context.sc_edi;
471 tf->tf_esi = context.sc_esi;
472 tf->tf_ebp = context.sc_ebp;
473 tf->tf_ebx = context.sc_ebx;
474 tf->tf_edx = context.sc_edx;
475 tf->tf_ecx = context.sc_ecx;
476 tf->tf_eax = context.sc_eax;
477 tf->tf_eip = context.sc_eip;
478 tf->tf_cs = context.sc_cs;
479 tf->tf_esp = context.sc_esp_at_signal;
480 tf->tf_ss = context.sc_ss;
481
482 /* Restore signal stack. */
483 /*
484 * Linux really does it this way; it doesn't have space in sigframe
485 * to save the onstack flag.
486 */
487 ss_gap = (ssize_t)
488 ((caddr_t) context.sc_esp_at_signal - (caddr_t) sas->ss_sp);
489 if (ss_gap >= 0 && ss_gap < sas->ss_size)
490 sas->ss_flags |= SS_ONSTACK;
491 else
492 sas->ss_flags &= ~SS_ONSTACK;
493
494 /* Restore signal mask. */
495 linux_old_to_native_sigset(&mask, &context.sc_mask);
496 (void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
497 DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
498 return EJUSTRETURN;
499 }
500
501 #ifdef USER_LDT
502
503 int
504 linux_read_ldt(l, uap, retval)
505 struct lwp *l;
506 struct linux_sys_modify_ldt_args /* {
507 syscallarg(int) func;
508 syscallarg(void *) ptr;
509 syscallarg(size_t) bytecount;
510 } */ *uap;
511 register_t *retval;
512 {
513 struct proc *p = l->l_proc;
514 struct i386_get_ldt_args gl;
515 int error;
516 caddr_t sg;
517 char *parms;
518
519 DPRINTF(("linux_read_ldt!"));
520 sg = stackgap_init(p, 0);
521
522 gl.start = 0;
523 gl.desc = SCARG(uap, ptr);
524 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
525
526 parms = stackgap_alloc(p, &sg, sizeof(gl));
527
528 if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
529 return (error);
530
531 if ((error = i386_get_ldt(l, parms, retval)) != 0)
532 return (error);
533
534 *retval *= sizeof(union descriptor);
535 return (0);
536 }
537
538 struct linux_ldt_info {
539 u_int entry_number;
540 u_long base_addr;
541 u_int limit;
542 u_int seg_32bit:1;
543 u_int contents:2;
544 u_int read_exec_only:1;
545 u_int limit_in_pages:1;
546 u_int seg_not_present:1;
547 u_int useable:1;
548 };
549
550 int
551 linux_write_ldt(l, uap, retval)
552 struct lwp *l;
553 struct linux_sys_modify_ldt_args /* {
554 syscallarg(int) func;
555 syscallarg(void *) ptr;
556 syscallarg(size_t) bytecount;
557 } */ *uap;
558 register_t *retval;
559 {
560 struct proc *p = l->l_proc;
561 struct linux_ldt_info ldt_info;
562 struct segment_descriptor sd;
563 struct i386_set_ldt_args sl;
564 int error;
565 caddr_t sg;
566 char *parms;
567 int oldmode = (int)retval[0];
568
569 DPRINTF(("linux_write_ldt %d\n", oldmode));
570 if (SCARG(uap, bytecount) != sizeof(ldt_info))
571 return (EINVAL);
572 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
573 return error;
574 if (ldt_info.entry_number >= 8192)
575 return (EINVAL);
576 if (ldt_info.contents == 3) {
577 if (oldmode)
578 return (EINVAL);
579 if (ldt_info.seg_not_present)
580 return (EINVAL);
581 }
582
583 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
584 (oldmode || (ldt_info.contents == 0 &&
585 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
586 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
587 ldt_info.useable == 0))) {
588 /* this means you should zero the ldt */
589 (void)memset(&sd, 0, sizeof(sd));
590 } else {
591 sd.sd_lobase = ldt_info.base_addr & 0xffffff;
592 sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
593 sd.sd_lolimit = ldt_info.limit & 0xffff;
594 sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
595 sd.sd_type = 16 | (ldt_info.contents << 2) |
596 (!ldt_info.read_exec_only << 1);
597 sd.sd_dpl = SEL_UPL;
598 sd.sd_p = !ldt_info.seg_not_present;
599 sd.sd_def32 = ldt_info.seg_32bit;
600 sd.sd_gran = ldt_info.limit_in_pages;
601 if (!oldmode)
602 sd.sd_xx = ldt_info.useable;
603 else
604 sd.sd_xx = 0;
605 }
606 sg = stackgap_init(p, 0);
607 sl.start = ldt_info.entry_number;
608 sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
609 sl.num = 1;
610
611 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
612 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
613
614 parms = stackgap_alloc(p, &sg, sizeof(sl));
615
616 if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
617 return (error);
618 if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
619 return (error);
620
621 if ((error = i386_set_ldt(l, parms, retval)) != 0)
622 return (error);
623
624 *retval = 0;
625 return (0);
626 }
627
628 #endif /* USER_LDT */
629
630 int
631 linux_sys_modify_ldt(l, v, retval)
632 struct lwp *l;
633 void *v;
634 register_t *retval;
635 {
636 struct linux_sys_modify_ldt_args /* {
637 syscallarg(int) func;
638 syscallarg(void *) ptr;
639 syscallarg(size_t) bytecount;
640 } */ *uap = v;
641
642 switch (SCARG(uap, func)) {
643 #ifdef USER_LDT
644 case 0:
645 return linux_read_ldt(l, uap, retval);
646 case 1:
647 retval[0] = 1;
648 return linux_write_ldt(l, uap, retval);
649 case 2:
650 #ifdef notyet
651 return (linux_read_default_ldt(l, uap, retval);
652 #else
653 return (ENOSYS);
654 #endif
655 case 0x11:
656 retval[0] = 0;
657 return linux_write_ldt(l, uap, retval);
658 #endif /* USER_LDT */
659
660 default:
661 return (ENOSYS);
662 }
663 }
664
665 /*
666 * XXX Pathetic hack to make svgalib work. This will fake the major
667 * device number of an opened VT so that svgalib likes it. grmbl.
668 * Should probably do it 'wrong the right way' and use a mapping
669 * array for all major device numbers, and map linux_mknod too.
670 */
671 dev_t
672 linux_fakedev(dev, raw)
673 dev_t dev;
674 int raw;
675 {
676 if (raw) {
677 #if (NWSDISPLAY > 0)
678 extern const struct cdevsw wsdisplay_cdevsw;
679 if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
680 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
681 #endif
682 }
683
684 return dev;
685 }
686
687 #if (NWSDISPLAY > 0)
688 /*
689 * That's not complete, but enough to get an X server running.
690 */
691 #define NR_KEYS 128
692 static const u_short plain_map[NR_KEYS] = {
693 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
694 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
695 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
696 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
697 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
698 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
699 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
700 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
701 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
702 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
703 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
704 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
705 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
706 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
707 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
708 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
709 }, shift_map[NR_KEYS] = {
710 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
711 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
712 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
713 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
714 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
715 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
716 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
717 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
718 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
719 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
720 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
721 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
722 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
723 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
724 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
725 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
726 }, altgr_map[NR_KEYS] = {
727 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
728 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
729 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
730 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
731 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
732 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
733 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
734 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
735 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
736 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
737 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
738 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
739 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
740 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
741 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
742 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
743 }, ctrl_map[NR_KEYS] = {
744 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
745 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
746 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
747 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
748 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
749 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
750 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
751 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
752 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
753 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
754 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
755 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
756 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
757 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
758 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
759 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
760 };
761
762 const u_short * const linux_keytabs[] = {
763 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
764 };
765 #endif
766
767 static struct biosdisk_info *
768 fd2biosinfo(p, fp)
769 struct proc *p;
770 struct file *fp;
771 {
772 struct vnode *vp;
773 const char *blkname;
774 char diskname[16];
775 int i;
776 struct nativedisk_info *nip;
777 struct disklist *dl = i386_alldisks;
778
779 if (fp->f_type != DTYPE_VNODE)
780 return NULL;
781 vp = (struct vnode *)fp->f_data;
782
783 if (vp->v_type != VBLK)
784 return NULL;
785
786 blkname = devsw_blk2name(major(vp->v_rdev));
787 snprintf(diskname, sizeof diskname, "%s%u", blkname,
788 DISKUNIT(vp->v_rdev));
789
790 for (i = 0; i < dl->dl_nnativedisks; i++) {
791 nip = &dl->dl_nativedisks[i];
792 if (strcmp(diskname, nip->ni_devname))
793 continue;
794 if (nip->ni_nmatches != 0)
795 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
796 }
797
798 return NULL;
799 }
800
801
802 /*
803 * We come here in a last attempt to satisfy a Linux ioctl() call
804 */
805 int
806 linux_machdepioctl(p, v, retval)
807 struct proc *p;
808 void *v;
809 register_t *retval;
810 {
811 struct linux_sys_ioctl_args /* {
812 syscallarg(int) fd;
813 syscallarg(u_long) com;
814 syscallarg(caddr_t) data;
815 } */ *uap = v;
816 struct sys_ioctl_args bia;
817 u_long com;
818 int error, error1;
819 #if (NWSDISPLAY > 0)
820 struct vt_mode lvt;
821 caddr_t bvtp, sg;
822 struct kbentry kbe;
823 #endif
824 struct linux_hd_geometry hdg;
825 struct linux_hd_big_geometry hdg_big;
826 struct biosdisk_info *bip;
827 struct filedesc *fdp;
828 struct file *fp;
829 int fd;
830 struct disklabel label, *labp;
831 struct partinfo partp;
832 int (*ioctlf) __P((struct file *, u_long, caddr_t, struct proc *));
833 u_long start, biostotal, realtotal;
834 u_char heads, sectors;
835 u_int cylinders;
836 struct ioctl_pt pt;
837
838 fd = SCARG(uap, fd);
839 SCARG(&bia, fd) = fd;
840 SCARG(&bia, data) = SCARG(uap, data);
841 com = SCARG(uap, com);
842
843 fdp = p->p_fd;
844
845 if ((fp = fd_getfile(fdp, fd)) == NULL)
846 return (EBADF);
847
848 FILE_USE(fp);
849
850 switch (com) {
851 #if (NWSDISPLAY > 0)
852 case LINUX_KDGKBMODE:
853 com = KDGKBMODE;
854 break;
855 case LINUX_KDSKBMODE:
856 com = KDSKBMODE;
857 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
858 SCARG(&bia, data) = (caddr_t)K_RAW;
859 break;
860 case LINUX_KIOCSOUND:
861 SCARG(&bia, data) =
862 (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
863 /* fall through */
864 case LINUX_KDMKTONE:
865 com = KDMKTONE;
866 break;
867 case LINUX_KDSETMODE:
868 com = KDSETMODE;
869 break;
870 case LINUX_KDGETMODE:
871 /* KD_* values are equal to the wscons numbers */
872 com = WSDISPLAYIO_GMODE;
873 break;
874 case LINUX_KDENABIO:
875 com = KDENABIO;
876 break;
877 case LINUX_KDDISABIO:
878 com = KDDISABIO;
879 break;
880 case LINUX_KDGETLED:
881 com = KDGETLED;
882 break;
883 case LINUX_KDSETLED:
884 com = KDSETLED;
885 break;
886 case LINUX_VT_OPENQRY:
887 com = VT_OPENQRY;
888 break;
889 case LINUX_VT_GETMODE:
890 SCARG(&bia, com) = VT_GETMODE;
891 /* XXX NJWLWP */
892 if ((error = sys_ioctl(curlwp, &bia, retval)))
893 goto out;
894 if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
895 sizeof (struct vt_mode))))
896 goto out;
897 lvt.relsig = native_to_linux_signo[lvt.relsig];
898 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
899 lvt.frsig = native_to_linux_signo[lvt.frsig];
900 error = copyout((caddr_t)&lvt, SCARG(uap, data),
901 sizeof (struct vt_mode));
902 goto out;
903 case LINUX_VT_SETMODE:
904 com = VT_SETMODE;
905 if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
906 sizeof (struct vt_mode))))
907 goto out;
908 lvt.relsig = linux_to_native_signo[lvt.relsig];
909 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
910 lvt.frsig = linux_to_native_signo[lvt.frsig];
911 sg = stackgap_init(p, 0);
912 bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
913 if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
914 goto out;
915 SCARG(&bia, data) = bvtp;
916 break;
917 case LINUX_VT_DISALLOCATE:
918 /* XXX should use WSDISPLAYIO_DELSCREEN */
919 error = 0;
920 goto out;
921 case LINUX_VT_RELDISP:
922 com = VT_RELDISP;
923 break;
924 case LINUX_VT_ACTIVATE:
925 com = VT_ACTIVATE;
926 break;
927 case LINUX_VT_WAITACTIVE:
928 com = VT_WAITACTIVE;
929 break;
930 case LINUX_VT_GETSTATE:
931 com = VT_GETSTATE;
932 break;
933 case LINUX_KDGKBTYPE:
934 /* This is what Linux does. */
935 error = subyte(SCARG(uap, data), KB_101);
936 goto out;
937 case LINUX_KDGKBENT:
938 /*
939 * The Linux KDGKBENT ioctl is different from the
940 * SYSV original. So we handle it in machdep code.
941 * XXX We should use keyboard mapping information
942 * from wsdisplay, but this would be expensive.
943 */
944 if ((error = copyin(SCARG(uap, data), &kbe,
945 sizeof(struct kbentry))))
946 goto out;
947 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
948 || kbe.kb_index >= NR_KEYS) {
949 error = EINVAL;
950 goto out;
951 }
952 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
953 error = copyout(&kbe, SCARG(uap, data),
954 sizeof(struct kbentry));
955 goto out;
956 #endif
957 case LINUX_HDIO_GETGEO:
958 case LINUX_HDIO_GETGEO_BIG:
959 /*
960 * Try to mimic Linux behaviour: return the BIOS geometry
961 * if possible (extending its # of cylinders if it's beyond
962 * the 1023 limit), fall back to the MI geometry (i.e.
963 * the real geometry) if not found, by returning an
964 * error. See common/linux_hdio.c
965 */
966 bip = fd2biosinfo(p, fp);
967 ioctlf = fp->f_ops->fo_ioctl;
968 error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
969 error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
970 if (error != 0 && error1 != 0) {
971 error = error1;
972 goto out;
973 }
974 labp = error != 0 ? &label : partp.disklab;
975 start = error1 != 0 ? partp.part->p_offset : 0;
976 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
977 && bip->bi_cyl != 0) {
978 heads = bip->bi_head;
979 sectors = bip->bi_sec;
980 cylinders = bip->bi_cyl;
981 biostotal = heads * sectors * cylinders;
982 realtotal = labp->d_ntracks * labp->d_nsectors *
983 labp->d_ncylinders;
984 if (realtotal > biostotal)
985 cylinders = realtotal / (heads * sectors);
986 } else {
987 heads = labp->d_ntracks;
988 cylinders = labp->d_ncylinders;
989 sectors = labp->d_nsectors;
990 }
991 if (com == LINUX_HDIO_GETGEO) {
992 hdg.start = start;
993 hdg.heads = heads;
994 hdg.cylinders = cylinders;
995 hdg.sectors = sectors;
996 error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
997 goto out;
998 } else {
999 hdg_big.start = start;
1000 hdg_big.heads = heads;
1001 hdg_big.cylinders = cylinders;
1002 hdg_big.sectors = sectors;
1003 error = copyout(&hdg_big, SCARG(uap, data),
1004 sizeof hdg_big);
1005 goto out;
1006 }
1007
1008 default:
1009 /*
1010 * Unknown to us. If it's on a device, just pass it through
1011 * using PTIOCLINUX, the device itself might be able to
1012 * make some sense of it.
1013 * XXX hack: if the function returns EJUSTRETURN,
1014 * it has stuffed a sysctl return value in pt.data.
1015 */
1016 FILE_USE(fp);
1017 ioctlf = fp->f_ops->fo_ioctl;
1018 pt.com = SCARG(uap, com);
1019 pt.data = SCARG(uap, data);
1020 error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
1021 FILE_UNUSE(fp, p);
1022 if (error == EJUSTRETURN) {
1023 retval[0] = (register_t)pt.data;
1024 error = 0;
1025 }
1026
1027 if (error == ENOTTY)
1028 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1029 com));
1030 goto out;
1031 }
1032 SCARG(&bia, com) = com;
1033 /* XXX NJWLWP */
1034 error = sys_ioctl(curlwp, &bia, retval);
1035 out:
1036 FILE_UNUSE(fp ,p);
1037 return error;
1038 }
1039
1040 /*
1041 * Set I/O permissions for a process. Just set the maximum level
1042 * right away (ignoring the argument), otherwise we would have
1043 * to rely on I/O permission maps, which are not implemented.
1044 */
1045 int
1046 linux_sys_iopl(l, v, retval)
1047 struct lwp *l;
1048 void *v;
1049 register_t *retval;
1050 {
1051 #if 0
1052 struct linux_sys_iopl_args /* {
1053 syscallarg(int) level;
1054 } */ *uap = v;
1055 #endif
1056 struct proc *p = l->l_proc;
1057 struct trapframe *fp = l->l_md.md_regs;
1058
1059 if (suser(p->p_ucred, &p->p_acflag) != 0)
1060 return EPERM;
1061 fp->tf_eflags |= PSL_IOPL;
1062 *retval = 0;
1063 return 0;
1064 }
1065
1066 /*
1067 * See above. If a root process tries to set access to an I/O port,
1068 * just let it have the whole range.
1069 */
1070 int
1071 linux_sys_ioperm(l, v, retval)
1072 struct lwp *l;
1073 void *v;
1074 register_t *retval;
1075 {
1076 struct linux_sys_ioperm_args /* {
1077 syscallarg(unsigned int) lo;
1078 syscallarg(unsigned int) hi;
1079 syscallarg(int) val;
1080 } */ *uap = v;
1081 struct proc *p = l->l_proc;
1082 struct trapframe *fp = l->l_md.md_regs;
1083
1084 if (suser(p->p_ucred, &p->p_acflag) != 0)
1085 return EPERM;
1086 if (SCARG(uap, val))
1087 fp->tf_eflags |= PSL_IOPL;
1088 *retval = 0;
1089 return 0;
1090 }
1091