linux_exec_machdep.c revision 1.10 1 /* $NetBSD: linux_exec_machdep.c,v 1.10 2008/10/26 17:57:49 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2004 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Christos Zoulas.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_exec_machdep.c,v 1.10 2008/10/26 17:57:49 christos Exp $");
34
35 #if defined(_KERNEL_OPT)
36 #include "opt_vm86.h"
37 #include "opt_user_ldt.h"
38 #endif
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/resource.h>
43 #include <sys/proc.h>
44 #include <sys/conf.h>
45 #include <sys/exec.h>
46 #include <sys/exec_elf.h>
47 #include <sys/vnode.h>
48 #include <sys/lwp.h>
49
50 #include <sys/cpu.h>
51 #include <machine/vmparam.h>
52
53 #include <uvm/uvm.h>
54
55 #include <sys/syscallargs.h>
56
57 #ifndef DEBUG_LINUX
58 #define DPRINTF(a)
59 #else
60 #define DPRINTF(a) uprintf a
61 #endif
62
63 #include <compat/linux/common/linux_types.h>
64 #include <compat/linux/common/linux_signal.h>
65 #include <compat/linux/common/linux_machdep.h>
66 #include <compat/linux/common/linux_util.h>
67 #include <compat/linux/common/linux_ioctl.h>
68 #include <compat/linux/common/linux_hdio.h>
69 #include <compat/linux/common/linux_exec.h>
70 #include <compat/linux/common/linux_errno.h>
71 #include <compat/linux//linux_syscallargs.h>
72
73
74 int
75 linux_exec_setup_stack(struct lwp *l, struct exec_package *epp)
76 {
77 u_long max_stack_size;
78 u_long access_linear_min, access_size;
79 u_long noaccess_linear_min, noaccess_size;
80
81 #ifndef USRSTACK32
82 #define USRSTACK32 (0x00000000ffffffffL&~PGOFSET)
83 #endif
84
85 if (epp->ep_flags & EXEC_32) {
86 epp->ep_minsaddr = USRSTACK32;
87 max_stack_size = MAXSSIZ;
88 } else {
89 epp->ep_minsaddr = USRSTACK;
90 max_stack_size = MAXSSIZ;
91 }
92
93 if (epp->ep_minsaddr > LINUX_USRSTACK)
94 epp->ep_minsaddr = LINUX_USRSTACK;
95 #ifdef DEBUG_LINUX
96 else {
97 /*
98 * Someone needs to make KERNBASE and TEXTADDR
99 * java versions < 1.4.2 need the stack to be
100 * at 0xC0000000
101 */
102 uprintf("Cannot setup stack to 0xC0000000, "
103 "java will not work properly\n");
104 }
105 #endif
106 epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr,
107 max_stack_size);
108 epp->ep_ssize = l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur;
109
110 /*
111 * set up commands for stack. note that this takes *two*, one to
112 * map the part of the stack which we can access, and one to map
113 * the part which we can't.
114 *
115 * arguably, it could be made into one, but that would require the
116 * addition of another mapping proc, which is unnecessary
117 */
118 access_size = epp->ep_ssize;
119 access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size);
120 noaccess_size = max_stack_size - access_size;
121 noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
122 access_size), noaccess_size);
123 if (noaccess_size > 0) {
124 NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
125 noaccess_linear_min, NULLVP, 0, VM_PROT_NONE);
126 }
127 KASSERT(access_size > 0);
128 NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
129 access_linear_min, NULLVP, 0, VM_PROT_READ | VM_PROT_WRITE);
130
131 return 0;
132 }
133
134
135 #ifdef LINUX_NPTL
136 static __inline void
137 load_gs(u_int sel)
138 {
139 __asm __volatile("movl %0,%%gs" : : "rm" (sel));
140 }
141
142
143 int
144 linux_init_thread_area(struct lwp *l, struct lwp *l2)
145 {
146 struct trapframe *tf = l->l_md.md_regs, *tf2 = l2->l_md.md_regs;
147 struct pcb *pcb2 = &l2->l_addr->u_pcb;
148 struct linux_user_desc info;
149 struct segment_descriptor sd;
150 int error, idx, a[2];
151
152 error = copyin((void *)tf->tf_esi, &info, sizeof(info));
153 if (error)
154 return error;
155 idx = info.entry_number;
156
157 /*
158 * looks like we're getting the idx we returned
159 * in the set_thread_area() syscall
160 */
161 if (idx != LINUX_GLIBC_TLS_SEL && idx != GUGS_SEL) {
162 DPRINTF(("resetting idx %d to GUGS_SEL", idx));
163 idx = GUGS_SEL;
164 }
165
166 /* this doesnt happen in practice */
167 if (idx == LINUX_GLIBC_TLS_SEL) {
168 /* we might copy out the entry_number as 3 */
169 info.entry_number = GUGS_SEL;
170 error = copyout(&info, (void *)tf->tf_esi, sizeof(info));
171 if (error)
172 return error;
173 }
174
175 a[0] = LINUX_LDT_entry_a(&info);
176 a[1] = LINUX_LDT_entry_b(&info);
177
178 (void)memcpy(&sd, &a, sizeof(a));
179 KASSERT(ISMEMSDP((&sd)));
180 DPRINTF(("Segment created in clone with CLONE_SETTLS: lobase: %x, "
181 "hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, "
182 "xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
183 sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, sd.sd_type, sd.sd_dpl,
184 sd.sd_p, sd.sd_xx, sd.sd_def32, sd.sd_gran));
185
186 kpreempt_disable();
187 (void)memcpy(&pcb2->pcb_gsd, &sd, sizeof(sd));
188 (void)memcpy(&curcpu()->ci_gdt[GUGS_SEL], &sd, sizeof(sd));
189 tf2->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
190 load_gs(tf2->tf_gs);
191 kpreempt_enable();
192 return 0;
193 }
194
195
196 int
197 linux_sys_set_thread_area(struct lwp *l,
198 const struct linux_sys_set_thread_area_args *uap, register_t *retval)
199 {
200 struct trapframe *tf = l->l_md.md_regs;
201 struct pcb *pcb = &l->l_addr->u_pcb;
202 struct linux_user_desc info;
203 struct segment_descriptor sd;
204 int error, idx, a[2];
205
206 *retval = 0;
207 error = copyin(SCARG(uap, desc), &info, sizeof(info));
208 if (error)
209 return error;
210
211 DPRINTF(("set thread area: %i, %x, %x, %i, %i, %i, %i, %i, %i\n",
212 info.entry_number, info.base_addr, info.limit, info.seg_32bit,
213 info.contents, info.read_exec_only, info.limit_in_pages,
214 info.seg_not_present, info.useable));
215
216 idx = info.entry_number;
217 /*
218 * Semantics of linux version: every thread in the system has array of
219 * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This
220 * syscall loads one of the selected tls decriptors with a value and
221 * also loads GDT descriptors 6, 7 and 8 with the content of the
222 * per-thread descriptors.
223 *
224 * Semantics of fbsd version: I think we can ignore that linux has 3
225 * per-thread descriptors and use just the 1st one. The tls_array[]
226 * is used only in set/get-thread_area() syscalls and for loading the
227 * GDT descriptors. In fbsd we use just one GDT descriptor for TLS so
228 * we will load just one.
229 *
230 * XXX: this doesn't work when a user space process tries to use more
231 * than 1 TLS segment. Comment in the linux sources says wine might do
232 * this.
233 */
234
235 /*
236 * we support just GLIBC TLS now
237 * we should let 3 proceed as well because we use this segment so
238 * if code does two subsequent calls it should succeed
239 */
240 if (idx != LINUX_GLIBC_TLS_SEL && idx != -1 && idx != GUGS_SEL)
241 return EINVAL;
242
243 /*
244 * we have to copy out the GDT entry we use
245 * FreeBSD uses GDT entry #3 for storing %gs so load that
246 *
247 * XXX: what if a user space program doesn't check this value and tries
248 * to use 6, 7 or 8?
249 */
250 idx = info.entry_number = GUGS_SEL;
251 error = copyout(&info, SCARG(uap, desc), sizeof(info));
252 if (error)
253 return error;
254
255 if (LINUX_LDT_empty(&info)) {
256 a[0] = 0;
257 a[1] = 0;
258 } else {
259 a[0] = LINUX_LDT_entry_a(&info);
260 a[1] = LINUX_LDT_entry_b(&info);
261 }
262
263 (void)memcpy(&sd, &a, sizeof(a));
264 KASSERT(ISMEMSDP((&sd)));
265 DPRINTF(("Segment created in set_thread_area: lobase: %x, hibase: %x, "
266 "lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, "
267 "def32: %i, gran: %i\n", sd.sd_lobase, sd.sd_hibase, sd.sd_lolimit,
268 sd.sd_hilimit, sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx,
269 sd.sd_def32, sd.sd_gran));
270
271 kpreempt_disable();
272 (void)memcpy(&pcb->pcb_gsd, &sd, sizeof(sd));
273 (void)memcpy(&curcpu()->ci_gdt[GUGS_SEL], &sd, sizeof(sd));
274 tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
275 load_gs(tf->tf_gs);
276 kpreempt_enable();
277 return 0;
278 }
279
280 int
281 linux_sys_get_thread_area(struct lwp *l,
282 const struct linux_sys_get_thread_area_args *uap, register_t *retval)
283 {
284 struct pcb *pcb = &l->l_addr->u_pcb;
285 struct linux_user_desc info;
286 struct linux_desc_struct desc;
287 struct segment_descriptor sd;
288 int error, idx;
289
290 *retval = 0;
291 error = copyin(SCARG(uap, desc), &info, sizeof(info));
292 if (error)
293 return error;
294
295 idx = info.entry_number;
296 /* XXX: I am not sure if we want 3 to be allowed too. */
297 if (idx != LINUX_GLIBC_TLS_SEL && idx != GUGS_SEL)
298 return EINVAL;
299
300 idx = GUGS_SEL;
301
302 (void)memset(&info, 0, sizeof(info));
303 (void)memcpy(&sd, pcb->pcb_gsd, sizeof(sd));
304 (void)memcpy(&desc, &sd, sizeof(desc));
305
306 info.entry_number = idx;
307 info.base_addr = LINUX_GET_BASE(&desc);
308 info.limit = LINUX_GET_LIMIT(&desc);
309 info.seg_32bit = LINUX_GET_32BIT(&desc);
310 info.contents = LINUX_GET_CONTENTS(&desc);
311 info.read_exec_only = !LINUX_GET_WRITABLE(&desc);
312 info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc);
313 info.seg_not_present = !LINUX_GET_PRESENT(&desc);
314 info.useable = LINUX_GET_USEABLE(&desc);
315
316 return copyout(&info, SCARG(uap, desc), sizeof(info));
317 }
318
319 #endif
320