linux_exec_machdep.c revision 1.4.20.2 1 1.4.20.2 yamt /* $NetBSD: linux_exec_machdep.c,v 1.4.20.2 2009/05/04 08:12:20 yamt Exp $ */
2 1.1 christos
3 1.1 christos /*-
4 1.1 christos * Copyright (c) 2004 The NetBSD Foundation, Inc.
5 1.1 christos * All rights reserved.
6 1.1 christos *
7 1.1 christos * This code is derived from software contributed to The NetBSD Foundation
8 1.1 christos * by Christos Zoulas.
9 1.1 christos *
10 1.1 christos * Redistribution and use in source and binary forms, with or without
11 1.1 christos * modification, are permitted provided that the following conditions
12 1.1 christos * are met:
13 1.1 christos * 1. Redistributions of source code must retain the above copyright
14 1.1 christos * notice, this list of conditions and the following disclaimer.
15 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 christos * notice, this list of conditions and the following disclaimer in the
17 1.1 christos * documentation and/or other materials provided with the distribution.
18 1.1 christos *
19 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 christos * POSSIBILITY OF SUCH DAMAGE.
30 1.1 christos */
31 1.1 christos
32 1.1 christos #include <sys/cdefs.h>
33 1.4.20.2 yamt __KERNEL_RCSID(0, "$NetBSD: linux_exec_machdep.c,v 1.4.20.2 2009/05/04 08:12:20 yamt Exp $");
34 1.1 christos
35 1.1 christos #if defined(_KERNEL_OPT)
36 1.1 christos #include "opt_vm86.h"
37 1.1 christos #include "opt_user_ldt.h"
38 1.1 christos #endif
39 1.1 christos
40 1.1 christos #include <sys/param.h>
41 1.1 christos #include <sys/systm.h>
42 1.1 christos #include <sys/resource.h>
43 1.1 christos #include <sys/proc.h>
44 1.1 christos #include <sys/conf.h>
45 1.1 christos #include <sys/exec.h>
46 1.1 christos #include <sys/exec_elf.h>
47 1.1 christos #include <sys/vnode.h>
48 1.1 christos #include <sys/lwp.h>
49 1.1 christos
50 1.4 ad #include <sys/cpu.h>
51 1.1 christos #include <machine/vmparam.h>
52 1.1 christos
53 1.1 christos #include <uvm/uvm.h>
54 1.1 christos
55 1.4.20.2 yamt #include <sys/syscallargs.h>
56 1.4.20.2 yamt
57 1.4.20.2 yamt #ifndef DEBUG_LINUX
58 1.4.20.2 yamt #define DPRINTF(a)
59 1.4.20.2 yamt #else
60 1.4.20.2 yamt #define DPRINTF(a) uprintf a
61 1.4.20.2 yamt #endif
62 1.4.20.2 yamt
63 1.1 christos #include <compat/linux/common/linux_types.h>
64 1.1 christos #include <compat/linux/common/linux_signal.h>
65 1.4.20.2 yamt #include <compat/linux/common/linux_machdep.h>
66 1.1 christos #include <compat/linux/common/linux_util.h>
67 1.1 christos #include <compat/linux/common/linux_ioctl.h>
68 1.1 christos #include <compat/linux/common/linux_hdio.h>
69 1.1 christos #include <compat/linux/common/linux_exec.h>
70 1.1 christos #include <compat/linux/common/linux_errno.h>
71 1.4.20.2 yamt #include <compat/linux//linux_syscallargs.h>
72 1.4.20.2 yamt
73 1.1 christos
74 1.1 christos int
75 1.3 christos linux_exec_setup_stack(struct lwp *l, struct exec_package *epp)
76 1.1 christos {
77 1.1 christos u_long max_stack_size;
78 1.1 christos u_long access_linear_min, access_size;
79 1.1 christos u_long noaccess_linear_min, noaccess_size;
80 1.1 christos
81 1.1 christos #ifndef USRSTACK32
82 1.1 christos #define USRSTACK32 (0x00000000ffffffffL&~PGOFSET)
83 1.1 christos #endif
84 1.1 christos
85 1.1 christos if (epp->ep_flags & EXEC_32) {
86 1.1 christos epp->ep_minsaddr = USRSTACK32;
87 1.1 christos max_stack_size = MAXSSIZ;
88 1.1 christos } else {
89 1.1 christos epp->ep_minsaddr = USRSTACK;
90 1.1 christos max_stack_size = MAXSSIZ;
91 1.1 christos }
92 1.1 christos
93 1.1 christos if (epp->ep_minsaddr > LINUX_USRSTACK)
94 1.1 christos epp->ep_minsaddr = LINUX_USRSTACK;
95 1.1 christos #ifdef DEBUG_LINUX
96 1.1 christos else {
97 1.1 christos /*
98 1.1 christos * Someone needs to make KERNBASE and TEXTADDR
99 1.1 christos * java versions < 1.4.2 need the stack to be
100 1.1 christos * at 0xC0000000
101 1.1 christos */
102 1.1 christos uprintf("Cannot setup stack to 0xC0000000, "
103 1.1 christos "java will not work properly\n");
104 1.1 christos }
105 1.1 christos #endif
106 1.2 perry epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr,
107 1.1 christos max_stack_size);
108 1.3 christos epp->ep_ssize = l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur;
109 1.1 christos
110 1.1 christos /*
111 1.1 christos * set up commands for stack. note that this takes *two*, one to
112 1.1 christos * map the part of the stack which we can access, and one to map
113 1.1 christos * the part which we can't.
114 1.1 christos *
115 1.1 christos * arguably, it could be made into one, but that would require the
116 1.1 christos * addition of another mapping proc, which is unnecessary
117 1.1 christos */
118 1.1 christos access_size = epp->ep_ssize;
119 1.1 christos access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size);
120 1.1 christos noaccess_size = max_stack_size - access_size;
121 1.2 perry noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
122 1.1 christos access_size), noaccess_size);
123 1.1 christos if (noaccess_size > 0) {
124 1.4.20.2 yamt NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
125 1.4.20.2 yamt noaccess_linear_min, NULLVP, 0, VM_PROT_NONE, VMCMD_STACK);
126 1.1 christos }
127 1.1 christos KASSERT(access_size > 0);
128 1.4.20.2 yamt NEW_VMCMD2(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
129 1.4.20.2 yamt access_linear_min, NULLVP, 0, VM_PROT_READ | VM_PROT_WRITE,
130 1.4.20.2 yamt VMCMD_STACK);
131 1.4.20.2 yamt
132 1.4.20.2 yamt return 0;
133 1.4.20.2 yamt }
134 1.4.20.2 yamt
135 1.4.20.2 yamt
136 1.4.20.2 yamt #ifdef LINUX_NPTL
137 1.4.20.2 yamt static __inline void
138 1.4.20.2 yamt load_gs(u_int sel)
139 1.4.20.2 yamt {
140 1.4.20.2 yamt __asm __volatile("movl %0,%%gs" : : "rm" (sel));
141 1.4.20.2 yamt }
142 1.4.20.2 yamt
143 1.4.20.2 yamt
144 1.4.20.2 yamt int
145 1.4.20.2 yamt linux_init_thread_area(struct lwp *l, struct lwp *l2)
146 1.4.20.2 yamt {
147 1.4.20.2 yamt struct trapframe *tf = l->l_md.md_regs, *tf2 = l2->l_md.md_regs;
148 1.4.20.2 yamt struct pcb *pcb2 = &l2->l_addr->u_pcb;
149 1.4.20.2 yamt struct linux_user_desc info;
150 1.4.20.2 yamt struct segment_descriptor sd;
151 1.4.20.2 yamt int error, idx, a[2];
152 1.4.20.2 yamt
153 1.4.20.2 yamt error = copyin((void *)tf->tf_esi, &info, sizeof(info));
154 1.4.20.2 yamt if (error)
155 1.4.20.2 yamt return error;
156 1.4.20.2 yamt idx = info.entry_number;
157 1.4.20.2 yamt
158 1.4.20.2 yamt /*
159 1.4.20.2 yamt * looks like we're getting the idx we returned
160 1.4.20.2 yamt * in the set_thread_area() syscall
161 1.4.20.2 yamt */
162 1.4.20.2 yamt if (idx != LINUX_GLIBC_TLS_SEL && idx != GUGS_SEL) {
163 1.4.20.2 yamt DPRINTF(("resetting idx %d to GUGS_SEL", idx));
164 1.4.20.2 yamt idx = GUGS_SEL;
165 1.4.20.2 yamt }
166 1.4.20.2 yamt
167 1.4.20.2 yamt /* this doesnt happen in practice */
168 1.4.20.2 yamt if (idx == LINUX_GLIBC_TLS_SEL) {
169 1.4.20.2 yamt /* we might copy out the entry_number as 3 */
170 1.4.20.2 yamt info.entry_number = GUGS_SEL;
171 1.4.20.2 yamt error = copyout(&info, (void *)tf->tf_esi, sizeof(info));
172 1.4.20.2 yamt if (error)
173 1.4.20.2 yamt return error;
174 1.4.20.2 yamt }
175 1.4.20.2 yamt
176 1.4.20.2 yamt a[0] = LINUX_LDT_entry_a(&info);
177 1.4.20.2 yamt a[1] = LINUX_LDT_entry_b(&info);
178 1.4.20.2 yamt
179 1.4.20.2 yamt (void)memcpy(&sd, &a, sizeof(a));
180 1.4.20.2 yamt KASSERT(ISMEMSDP((&sd)));
181 1.4.20.2 yamt DPRINTF(("Segment created in clone with CLONE_SETTLS: lobase: %x, "
182 1.4.20.2 yamt "hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, "
183 1.4.20.2 yamt "xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
184 1.4.20.2 yamt sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, sd.sd_type, sd.sd_dpl,
185 1.4.20.2 yamt sd.sd_p, sd.sd_xx, sd.sd_def32, sd.sd_gran));
186 1.4.20.2 yamt
187 1.4.20.2 yamt (void)memcpy(&pcb2->pcb_gsd, &sd, sizeof(sd));
188 1.4.20.2 yamt tf2->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
189 1.1 christos
190 1.1 christos return 0;
191 1.1 christos }
192 1.4.20.2 yamt
193 1.4.20.2 yamt
194 1.4.20.2 yamt int
195 1.4.20.2 yamt linux_sys_set_thread_area(struct lwp *l,
196 1.4.20.2 yamt const struct linux_sys_set_thread_area_args *uap, register_t *retval)
197 1.4.20.2 yamt {
198 1.4.20.2 yamt struct pcb *pcb = &l->l_addr->u_pcb;
199 1.4.20.2 yamt struct linux_user_desc info;
200 1.4.20.2 yamt struct segment_descriptor sd;
201 1.4.20.2 yamt int error, idx, a[2];
202 1.4.20.2 yamt
203 1.4.20.2 yamt *retval = 0;
204 1.4.20.2 yamt error = copyin(SCARG(uap, desc), &info, sizeof(info));
205 1.4.20.2 yamt if (error)
206 1.4.20.2 yamt return error;
207 1.4.20.2 yamt
208 1.4.20.2 yamt DPRINTF(("set thread area: %i, %x, %x, %i, %i, %i, %i, %i, %i\n",
209 1.4.20.2 yamt info.entry_number, info.base_addr, info.limit, info.seg_32bit,
210 1.4.20.2 yamt info.contents, info.read_exec_only, info.limit_in_pages,
211 1.4.20.2 yamt info.seg_not_present, info.useable));
212 1.4.20.2 yamt
213 1.4.20.2 yamt idx = info.entry_number;
214 1.4.20.2 yamt /*
215 1.4.20.2 yamt * Semantics of linux version: every thread in the system has array of
216 1.4.20.2 yamt * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This
217 1.4.20.2 yamt * syscall loads one of the selected tls decriptors with a value and
218 1.4.20.2 yamt * also loads GDT descriptors 6, 7 and 8 with the content of the
219 1.4.20.2 yamt * per-thread descriptors.
220 1.4.20.2 yamt *
221 1.4.20.2 yamt * Semantics of fbsd version: I think we can ignore that linux has 3
222 1.4.20.2 yamt * per-thread descriptors and use just the 1st one. The tls_array[]
223 1.4.20.2 yamt * is used only in set/get-thread_area() syscalls and for loading the
224 1.4.20.2 yamt * GDT descriptors. In fbsd we use just one GDT descriptor for TLS so
225 1.4.20.2 yamt * we will load just one.
226 1.4.20.2 yamt *
227 1.4.20.2 yamt * XXX: this doesn't work when a user space process tries to use more
228 1.4.20.2 yamt * than 1 TLS segment. Comment in the linux sources says wine might do
229 1.4.20.2 yamt * this.
230 1.4.20.2 yamt */
231 1.4.20.2 yamt
232 1.4.20.2 yamt /*
233 1.4.20.2 yamt * we support just GLIBC TLS now
234 1.4.20.2 yamt * we should let 3 proceed as well because we use this segment so
235 1.4.20.2 yamt * if code does two subsequent calls it should succeed
236 1.4.20.2 yamt */
237 1.4.20.2 yamt if (idx != LINUX_GLIBC_TLS_SEL && idx != -1 && idx != GUGS_SEL)
238 1.4.20.2 yamt return EINVAL;
239 1.4.20.2 yamt
240 1.4.20.2 yamt /*
241 1.4.20.2 yamt * we have to copy out the GDT entry we use
242 1.4.20.2 yamt * FreeBSD uses GDT entry #3 for storing %gs so load that
243 1.4.20.2 yamt *
244 1.4.20.2 yamt * XXX: what if a user space program doesn't check this value and tries
245 1.4.20.2 yamt * to use 6, 7 or 8?
246 1.4.20.2 yamt */
247 1.4.20.2 yamt idx = info.entry_number = GUGS_SEL;
248 1.4.20.2 yamt error = copyout(&info, SCARG(uap, desc), sizeof(info));
249 1.4.20.2 yamt if (error)
250 1.4.20.2 yamt return error;
251 1.4.20.2 yamt
252 1.4.20.2 yamt if (LINUX_LDT_empty(&info)) {
253 1.4.20.2 yamt a[0] = 0;
254 1.4.20.2 yamt a[1] = 0;
255 1.4.20.2 yamt } else {
256 1.4.20.2 yamt a[0] = LINUX_LDT_entry_a(&info);
257 1.4.20.2 yamt a[1] = LINUX_LDT_entry_b(&info);
258 1.4.20.2 yamt }
259 1.4.20.2 yamt
260 1.4.20.2 yamt (void)memcpy(&sd, &a, sizeof(a));
261 1.4.20.2 yamt KASSERT(ISMEMSDP((&sd)));
262 1.4.20.2 yamt DPRINTF(("Segment created in set_thread_area: lobase: %x, hibase: %x, "
263 1.4.20.2 yamt "lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, "
264 1.4.20.2 yamt "def32: %i, gran: %i\n", sd.sd_lobase, sd.sd_hibase, sd.sd_lolimit,
265 1.4.20.2 yamt sd.sd_hilimit, sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx,
266 1.4.20.2 yamt sd.sd_def32, sd.sd_gran));
267 1.4.20.2 yamt
268 1.4.20.2 yamt kpreempt_disable();
269 1.4.20.2 yamt (void)memcpy(&pcb->pcb_gsd, &sd, sizeof(sd));
270 1.4.20.2 yamt (void)memcpy(&curcpu()->ci_gdt[GUGS_SEL], &sd, sizeof(sd));
271 1.4.20.2 yamt load_gs(GSEL(GUGS_SEL, SEL_UPL));
272 1.4.20.2 yamt kpreempt_enable();
273 1.4.20.2 yamt return 0;
274 1.4.20.2 yamt }
275 1.4.20.2 yamt
276 1.4.20.2 yamt int
277 1.4.20.2 yamt linux_sys_get_thread_area(struct lwp *l,
278 1.4.20.2 yamt const struct linux_sys_get_thread_area_args *uap, register_t *retval)
279 1.4.20.2 yamt {
280 1.4.20.2 yamt struct pcb *pcb = &l->l_addr->u_pcb;
281 1.4.20.2 yamt struct linux_user_desc info;
282 1.4.20.2 yamt struct linux_desc_struct desc;
283 1.4.20.2 yamt struct segment_descriptor sd;
284 1.4.20.2 yamt int error, idx;
285 1.4.20.2 yamt
286 1.4.20.2 yamt *retval = 0;
287 1.4.20.2 yamt error = copyin(SCARG(uap, desc), &info, sizeof(info));
288 1.4.20.2 yamt if (error)
289 1.4.20.2 yamt return error;
290 1.4.20.2 yamt
291 1.4.20.2 yamt idx = info.entry_number;
292 1.4.20.2 yamt /* XXX: I am not sure if we want 3 to be allowed too. */
293 1.4.20.2 yamt if (idx != LINUX_GLIBC_TLS_SEL && idx != GUGS_SEL)
294 1.4.20.2 yamt return EINVAL;
295 1.4.20.2 yamt
296 1.4.20.2 yamt idx = GUGS_SEL;
297 1.4.20.2 yamt
298 1.4.20.2 yamt (void)memset(&info, 0, sizeof(info));
299 1.4.20.2 yamt (void)memcpy(&sd, pcb->pcb_gsd, sizeof(sd));
300 1.4.20.2 yamt (void)memcpy(&desc, &sd, sizeof(desc));
301 1.4.20.2 yamt
302 1.4.20.2 yamt info.entry_number = idx;
303 1.4.20.2 yamt info.base_addr = LINUX_GET_BASE(&desc);
304 1.4.20.2 yamt info.limit = LINUX_GET_LIMIT(&desc);
305 1.4.20.2 yamt info.seg_32bit = LINUX_GET_32BIT(&desc);
306 1.4.20.2 yamt info.contents = LINUX_GET_CONTENTS(&desc);
307 1.4.20.2 yamt info.read_exec_only = !LINUX_GET_WRITABLE(&desc);
308 1.4.20.2 yamt info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc);
309 1.4.20.2 yamt info.seg_not_present = !LINUX_GET_PRESENT(&desc);
310 1.4.20.2 yamt info.useable = LINUX_GET_USEABLE(&desc);
311 1.4.20.2 yamt
312 1.4.20.2 yamt return copyout(&info, SCARG(uap, desc), sizeof(info));
313 1.4.20.2 yamt }
314 1.4.20.2 yamt
315 1.4.20.2 yamt #endif
316