linux_machdep.c revision 1.3 1 /* $NetBSD: linux_machdep.c,v 1.3 2005/05/19 21:16:29 manu Exp $ */
2
3 /*-
4 * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. All advertising materials mentioning features or use of this software
15 * must display the following acknowledgement:
16 * This product includes software developed by Emmanuel Dreyfus
17 * 4. The name of the author may not be used to endorse or promote
18 * products derived from this software without specific prior written
19 * permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS''
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <sys/cdefs.h>
35
36 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.3 2005/05/19 21:16:29 manu Exp $");
37
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/signal.h>
42 #include <sys/exec.h>
43 #include <sys/proc.h>
44 #include <sys/ptrace.h> /* for process_read_fpregs() */
45 #include <sys/user.h>
46 #include <sys/ucontext.h>
47
48 #include <machine/reg.h>
49 #include <machine/pcb.h>
50 #include <machine/fpu.h>
51 #include <machine/mcontext.h>
52 #include <machine/specialreg.h>
53 #include <machine/vmparam.h>
54
55 #include <compat/linux/common/linux_signal.h>
56 #include <compat/linux/common/linux_errno.h>
57 #include <compat/linux/common/linux_exec.h>
58 #include <compat/linux/common/linux_ioctl.h>
59 #include <compat/linux/common/linux_prctl.h>
60 #include <compat/linux/common/linux_machdep.h>
61 #include <compat/linux/linux_syscallargs.h>
62
63
64 void
65 linux_setregs(l, epp, stack)
66 struct lwp *l;
67 struct exec_package *epp;
68 u_long stack;
69 {
70 struct pcb *pcb = &l->l_addr->u_pcb;
71 struct trapframe *tf;
72
73 /* If we were using the FPU, forget about it. */
74 if (l->l_addr->u_pcb.pcb_fpcpu != NULL)
75 fpusave_lwp(l, 0);
76
77 l->l_md.md_flags &= ~MDP_USEDFPU;
78 pcb->pcb_flags = 0;
79 pcb->pcb_savefpu.fp_fxsave.fx_fcw = __NetBSD_NPXCW__;
80 pcb->pcb_savefpu.fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__;
81 pcb->pcb_savefpu.fp_fxsave.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
82
83 l->l_proc->p_flag &= ~P_32;
84
85 printf("stack = 0x%lx, entry = 0x%lx\n", stack, epp->ep_entry);
86 tf = l->l_md.md_regs;
87 tf->tf_rax = 0;
88 tf->tf_rbx = 0;
89 tf->tf_rcx = epp->ep_entry;
90 tf->tf_rdx = 0;
91 tf->tf_rsi = 0;
92 tf->tf_rdi = 0;
93 tf->tf_rbp = 0;
94 tf->tf_rsp = stack;
95 tf->tf_r8 = 0;
96 tf->tf_r9 = 0;
97 tf->tf_r10 = 0;
98 tf->tf_r11 = 0;
99 tf->tf_r12 = 0;
100 tf->tf_r13 = 0;
101 tf->tf_r14 = 0;
102 tf->tf_r15 = 0;
103 tf->tf_rip = epp->ep_entry;
104 tf->tf_rflags = PSL_MBO | PSL_I;
105 tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL);
106 tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
107 tf->tf_ds = 0;
108 tf->tf_es = 0;
109 tf->tf_fs = 0;
110 tf->tf_gs = 0;
111
112 return;
113 }
114
115 void
116 linux_sendsig(ksi, mask)
117 const ksiginfo_t *ksi;
118 const sigset_t *mask;
119 {
120 struct lwp *l = curlwp;
121 struct proc *p = l->l_proc;
122 struct sigacts *ps = p->p_sigacts;
123 int onstack;
124 int sig = ksi->ksi_signo;
125 struct linux_rt_sigframe *sfp, sigframe;
126 struct linux__fpstate *fpsp, fpstate;
127 struct fpreg fpregs;
128 struct trapframe *tf = l->l_md.md_regs;
129 sig_t catcher = SIGACTION(p, sig).sa_handler;
130 linux_sigset_t lmask;
131 char *sp;
132 int error;
133
134 /* Do we need to jump onto the signal stack? */
135 onstack =
136 (p->p_sigctx.ps_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
137 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
138
139 /* Allocate space for the signal handler context. */
140 if (onstack)
141 sp = ((caddr_t)p->p_sigctx.ps_sigstk.ss_sp +
142 p->p_sigctx.ps_sigstk.ss_size);
143 else
144 sp = (caddr_t)tf->tf_rsp - 128;
145
146
147 /*
148 * Save FPU state, if any
149 */
150 if (l->l_md.md_flags & MDP_USEDFPU) {
151 sp = (char *)
152 (((long)sp - sizeof(struct linux__fpstate)) & ~0xfUL);
153 fpsp = (struct linux__fpstate *)sp;
154
155 (void)process_read_fpregs(l, &fpregs);
156 bzero(&fpstate, sizeof(fpstate));
157
158 fpstate.cwd = fpregs.fp_fcw;
159 fpstate.swd = fpregs.fp_fsw;
160 fpstate.twd = fpregs.fp_ftw;
161 fpstate.fop = fpregs.fp_fop;
162 fpstate.rip = fpregs.fp_rip;
163 fpstate.rdp = fpregs.fp_rdp;
164 fpstate.mxcsr = fpregs.fp_mxcsr;
165 fpstate.mxcsr_mask = fpregs.fp_mxcsr_mask;
166 memcpy(&fpstate.st_space, &fpregs.fp_st,
167 sizeof(fpstate.st_space));
168 memcpy(&fpstate.xmm_space, &fpregs.fp_xmm,
169 sizeof(fpstate.xmm_space));
170
171 if ((error = copyout(&fpstate, fpsp, sizeof(fpstate))) != 0) {
172 sigexit(l, SIGILL);
173 return;
174 }
175 } else {
176 fpsp = NULL;
177 }
178
179 /*
180 * Populate the rt_sigframe
181 */
182 sp = (char *)
183 ((((long)sp - sizeof(struct linux_rt_sigframe)) & ~0xfUL) - 8);
184 sfp = (struct linux_rt_sigframe *)sp;
185
186 bzero(&sigframe, sizeof(sigframe));
187 if (ps->sa_sigdesc[sig].sd_vers != 0)
188 sigframe.pretcode = (char *)ps->sa_sigdesc[sig].sd_tramp;
189 else
190 sigframe.pretcode = NULL;
191
192 /*
193 * The user context
194 */
195 sigframe.uc.luc_flags = 0;
196 sigframe.uc.luc_link = NULL;
197
198 /* This is used regardless of SA_ONSTACK in Linux */
199 sigframe.uc.luc_stack.ss_sp = p->p_sigctx.ps_sigstk.ss_sp;
200 sigframe.uc.luc_stack.ss_size = p->p_sigctx.ps_sigstk.ss_size;
201 sigframe.uc.luc_stack.ss_flags = 0;
202 if (p->p_sigctx.ps_sigstk.ss_flags & SS_ONSTACK)
203 sigframe.uc.luc_stack.ss_flags |= LINUX_SS_ONSTACK;
204 if (p->p_sigctx.ps_sigstk.ss_flags & SS_DISABLE)
205 sigframe.uc.luc_stack.ss_flags |= LINUX_SS_DISABLE;
206
207 sigframe.uc.luc_mcontext.r8 = tf->tf_r8;
208 sigframe.uc.luc_mcontext.r9 = tf->tf_r9;
209 sigframe.uc.luc_mcontext.r10 = tf->tf_r10;
210 sigframe.uc.luc_mcontext.r11 = tf->tf_r11;
211 sigframe.uc.luc_mcontext.r12 = tf->tf_r12;
212 sigframe.uc.luc_mcontext.r13 = tf->tf_r13;
213 sigframe.uc.luc_mcontext.r14 = tf->tf_r14;
214 sigframe.uc.luc_mcontext.r15 = tf->tf_r15;
215 sigframe.uc.luc_mcontext.rdi = tf->tf_rdi;
216 sigframe.uc.luc_mcontext.rsi = tf->tf_rsi;
217 sigframe.uc.luc_mcontext.rbp = tf->tf_rbp;
218 sigframe.uc.luc_mcontext.rbx = tf->tf_rbx;
219 sigframe.uc.luc_mcontext.rdx = tf->tf_rdx;
220 sigframe.uc.luc_mcontext.rcx = tf->tf_rcx;
221 sigframe.uc.luc_mcontext.rsp = tf->tf_rsp;
222 sigframe.uc.luc_mcontext.eflags = tf->tf_rflags;
223 sigframe.uc.luc_mcontext.cs = tf->tf_cs;
224 sigframe.uc.luc_mcontext.gs = tf->tf_gs;
225 sigframe.uc.luc_mcontext.fs = tf->tf_fs;
226 sigframe.uc.luc_mcontext.err = tf->tf_err;
227 sigframe.uc.luc_mcontext.trapno = tf->tf_trapno;
228 native_to_linux_sigset(&lmask, mask);
229 sigframe.uc.luc_mcontext.oldmask = lmask.sig[0];
230 sigframe.uc.luc_mcontext.cr2 = (long)l->l_addr->u_pcb.pcb_onfault;
231 sigframe.uc.luc_mcontext.fpstate = fpsp;
232 native_to_linux_sigset(&sigframe.uc.luc_sigmask, mask);
233
234 /*
235 * the siginfo structure
236 */
237 sigframe.info.lsi_signo = native_to_linux_signo[sig];
238 sigframe.info.lsi_errno = native_to_linux_errno[ksi->ksi_errno];
239 sigframe.info.lsi_code = ksi->ksi_code;
240
241 /* XXX This is a rought conversion, taken from i386 code */
242 switch (sigframe.info.lsi_signo) {
243 case LINUX_SIGILL:
244 case LINUX_SIGFPE:
245 case LINUX_SIGSEGV:
246 case LINUX_SIGBUS:
247 case LINUX_SIGTRAP:
248 sigframe.info._sifields._sigfault._addr = ksi->ksi_addr;
249 break;
250 case LINUX_SIGCHLD:
251 sigframe.info._sifields._sigchld._pid = ksi->ksi_pid;
252 sigframe.info._sifields._sigchld._uid = ksi->ksi_uid;
253 sigframe.info._sifields._sigchld._status = ksi->ksi_status;
254 sigframe.info._sifields._sigchld._utime = ksi->ksi_utime;
255 sigframe.info._sifields._sigchld._stime = ksi->ksi_stime;
256 break;
257 case LINUX_SIGIO:
258 sigframe.info._sifields._sigpoll._band = ksi->ksi_band;
259 sigframe.info._sifields._sigpoll._fd = ksi->ksi_fd;
260 break;
261 default:
262 sigframe.info._sifields._sigchld._pid = ksi->ksi_pid;
263 sigframe.info._sifields._sigchld._uid = ksi->ksi_uid;
264 if ((sigframe.info.lsi_signo == LINUX_SIGALRM) ||
265 (sigframe.info.lsi_signo >= LINUX_SIGRTMIN))
266 sigframe.info._sifields._timer._sigval.sival_ptr =
267 ksi->ksi_sigval.sival_ptr;
268 break;
269 }
270
271 if ((error = copyout(&sigframe, sp, sizeof(sigframe))) != 0) {
272 sigexit(l, SIGILL);
273 return;
274 }
275
276 /*
277 * Setup registers
278 * XXX for an unknown reason, the stack is shifted of 24 bytes
279 * when the signal handler is called. The +24 below is a dirty
280 * workaround, and the real problem should be fixed.
281 */
282 buildcontext(l, catcher, sp + 24);
283 tf->tf_rdi = sigframe.info.lsi_signo;
284 tf->tf_rax = 0;
285 tf->tf_rsi = (long)&sfp->info;
286 tf->tf_rdx = (long)&sfp->uc;
287
288 /*
289 * Remember we use signal stack
290 */
291 if (onstack)
292 p->p_sigctx.ps_sigstk.ss_flags |= SS_ONSTACK;
293 return;
294 }
295
296 int
297 linux_sys_modify_ldt(l, v, retval)
298 struct lwp *l;
299 void *v;
300 register_t *retval;
301 {
302 return 0;
303 }
304
305 int
306 linux_sys_iopl(l, v, retval)
307 struct lwp *l;
308 void *v;
309 register_t *retval;
310 {
311 return 0;
312 }
313
314 int
315 linux_sys_ioperm(l, v, retval)
316 struct lwp *l;
317 void *v;
318 register_t *retval;
319 {
320 return 0;
321 }
322
323 dev_t
324 linux_fakedev(dev, raw)
325 dev_t dev;
326 int raw;
327 {
328 return 0;
329 }
330
331 int
332 linux_machdepioctl(p, v, retval)
333 struct proc *p;
334 void *v;
335 register_t *retval;
336 {
337 return 0;
338 }
339
340 int
341 linux_sys_rt_sigreturn(l, v, retval)
342 struct lwp *l;
343 void *v;
344 register_t *retval;
345 {
346 struct linux_sys_rt_sigreturn_args /* {
347 syscallarg(struct linux_ucontext *) ucp;
348 } */ *uap = v;
349 struct linux_ucontext luctx;
350 struct trapframe *tf = l->l_md.md_regs;
351 struct linux_sigcontext *lsigctx;
352 struct linux__fpstate fpstate;
353 ucontext_t uctx;
354 mcontext_t *mctx;
355 struct fxsave64 *fxsave;
356 int error;
357
358 if ((error = copyin(SCARG(uap, ucp), &luctx, sizeof(luctx))) != 0) {
359 sigexit(l, SIGILL);
360 return error;
361 }
362 lsigctx = &luctx.luc_mcontext;
363
364 bzero(&uctx, sizeof(uctx));
365 mctx = (mcontext_t *)&uctx.uc_mcontext;
366 fxsave = (struct fxsave64 *)&mctx->__fpregs;
367
368 /*
369 * Set the flags. Linux always have CPU, stack and signal state,
370 * FPU is optional. uc_flags is not used to tell what we have.
371 */
372 uctx.uc_flags = (_UC_SIGMASK|_UC_CPU|_UC_STACK|_UC_CLRSTACK);
373 if (lsigctx->fpstate != NULL)
374 uctx.uc_flags |= _UC_FPU;
375 uctx.uc_link = NULL;
376
377 /*
378 * Signal set
379 */
380 linux_to_native_sigset(&uctx.uc_sigmask, &luctx.luc_sigmask);
381
382 /*
383 * CPU state
384 */
385 mctx->__gregs[_REG_R8] = lsigctx->r8;
386 mctx->__gregs[_REG_R9] = lsigctx->r9;
387 mctx->__gregs[_REG_R10] = lsigctx->r10;
388 mctx->__gregs[_REG_R11] = lsigctx->r11;
389 mctx->__gregs[_REG_R12] = lsigctx->r12;
390 mctx->__gregs[_REG_R13] = lsigctx->r13;
391 mctx->__gregs[_REG_R14] = lsigctx->r14;
392 mctx->__gregs[_REG_R15] = lsigctx->r15;
393 mctx->__gregs[_REG_RDI] = lsigctx->rdi;
394 mctx->__gregs[_REG_RSI] = lsigctx->rsi;
395 mctx->__gregs[_REG_RBP] = lsigctx->rbp;
396 mctx->__gregs[_REG_RBX] = lsigctx->rbx;
397 mctx->__gregs[_REG_RAX] = tf->tf_rax;
398 mctx->__gregs[_REG_RDX] = lsigctx->rdx;
399 mctx->__gregs[_REG_RCX] = lsigctx->rcx;
400 mctx->__gregs[_REG_RIP] = lsigctx->rip;
401 mctx->__gregs[_REG_RFL] = lsigctx->eflags;
402 mctx->__gregs[_REG_CS] = lsigctx->cs;
403 mctx->__gregs[_REG_GS] = lsigctx->gs;
404 mctx->__gregs[_REG_FS] = lsigctx->fs;
405 mctx->__gregs[_REG_ERR] = lsigctx->err;
406 mctx->__gregs[_REG_TRAPNO] = lsigctx->trapno;
407 mctx->__gregs[_REG_ES] = tf->tf_es;
408 mctx->__gregs[_REG_DS] = tf->tf_ds;
409 mctx->__gregs[_REG_URSP] = lsigctx->rsp; /* XXX */
410 mctx->__gregs[_REG_SS] = tf->tf_ss;
411
412 /*
413 * FPU state
414 */
415 if (lsigctx->fpstate != NULL) {
416 error = copyin(lsigctx->fpstate, &fpstate, sizeof(fpstate));
417 if (error != 0) {
418 sigexit(l, SIGILL);
419 return error;
420 }
421
422 fxsave->fx_fcw = fpstate.cwd;
423 fxsave->fx_fsw = fpstate.swd;
424 fxsave->fx_ftw = fpstate.twd;
425 fxsave->fx_fop = fpstate.fop;
426 fxsave->fx_rip = fpstate.rip;
427 fxsave->fx_rdp = fpstate.rdp;
428 fxsave->fx_mxcsr = fpstate.mxcsr;
429 fxsave->fx_mxcsr_mask = fpstate.mxcsr_mask;
430 memcpy(&fxsave->fx_st, &fpstate.st_space,
431 sizeof(fxsave->fx_st));
432 memcpy(&fxsave->fx_xmm, &fpstate.xmm_space,
433 sizeof(fxsave->fx_xmm));
434 }
435
436 /*
437 * And the stack
438 */
439 uctx.uc_stack.ss_flags = 0;
440 if (luctx.luc_stack.ss_flags & LINUX_SS_ONSTACK);
441 uctx.uc_stack.ss_flags = SS_ONSTACK;
442
443 if (luctx.luc_stack.ss_flags & LINUX_SS_DISABLE);
444 uctx.uc_stack.ss_flags = SS_DISABLE;
445
446 uctx.uc_stack.ss_sp = luctx.luc_stack.ss_sp;
447 uctx.uc_stack.ss_size = luctx.luc_stack.ss_size;
448
449 /*
450 * And let setucontext deal with that.
451 */
452 return setucontext(l, &uctx);
453 }
454
455 int
456 linux_sys_arch_prctl(l, v, retval)
457 struct lwp *l;
458 void *v;
459 register_t *retval;
460 {
461 struct linux_sys_arch_prctl_args /* {
462 syscallarg(int) code;
463 syscallarg(unsigned long) addr;
464 } */ *uap = v;
465 struct pcb *pcb = &l->l_addr->u_pcb;
466 struct trapframe *tf = l->l_md.md_regs;
467 int error;
468 uint64_t taddr;
469
470 switch(SCARG(uap, code)) {
471 case LINUX_ARCH_SET_GS:
472 taddr = SCARG(uap, addr);
473 if (taddr >= VM_MAXUSER_ADDRESS)
474 return EINVAL;
475 pcb->pcb_gs = taddr;
476 pcb->pcb_flags |= PCB_GS64;
477 if (l == curlwp)
478 wrmsr(MSR_KERNELGSBASE, taddr);
479 break;
480
481 case LINUX_ARCH_GET_GS:
482 if (pcb->pcb_flags & PCB_GS64)
483 taddr = pcb->pcb_gs;
484 else {
485 error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr);
486 if (error != 0)
487 return error;
488 }
489 error = copyout(&taddr, (char *)SCARG(uap, addr), 8);
490 if (error != 0)
491 return error;
492 break;
493
494 case LINUX_ARCH_SET_FS:
495 taddr = SCARG(uap, addr);
496 if (taddr >= VM_MAXUSER_ADDRESS)
497 return EINVAL;
498 pcb->pcb_fs = taddr;
499 pcb->pcb_flags |= PCB_FS64;
500 if (l == curlwp)
501 wrmsr(MSR_FSBASE, taddr);
502 break;
503
504 case LINUX_ARCH_GET_FS:
505 if (pcb->pcb_flags & PCB_FS64)
506 taddr = pcb->pcb_fs;
507 else {
508 error = memseg_baseaddr(l, tf->tf_fs, NULL, 0, &taddr);
509 if (error != 0)
510 return error;
511 }
512 error = copyout(&taddr, (char *)SCARG(uap, addr), 8);
513 if (error != 0)
514 return error;
515 break;
516
517 default:
518 #ifdef DEBUG_LINUX
519 printf("linux_sys_arch_prctl: unexpected code %d\n",
520 SCARG(uap, code));
521 #endif
522 return EINVAL;
523 }
524
525 return 0;
526 }
527