Home | History | Annotate | Line # | Download | only in i386
linux_exec_machdep.c revision 1.6
      1 /*	$NetBSD: linux_exec_machdep.c,v 1.6 2008/10/25 23:38:28 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2004 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Christos Zoulas.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_exec_machdep.c,v 1.6 2008/10/25 23:38:28 christos Exp $");
     34 
     35 #if defined(_KERNEL_OPT)
     36 #include "opt_vm86.h"
     37 #include "opt_user_ldt.h"
     38 #endif
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/resource.h>
     43 #include <sys/proc.h>
     44 #include <sys/conf.h>
     45 #include <sys/exec.h>
     46 #include <sys/exec_elf.h>
     47 #include <sys/vnode.h>
     48 #include <sys/lwp.h>
     49 
     50 #include <sys/cpu.h>
     51 #include <machine/vmparam.h>
     52 
     53 #include <uvm/uvm.h>
     54 
     55 #include <sys/syscallargs.h>
     56 
     57 #ifndef DEBUG_LINUX
     58 #define DPRINTF(a)
     59 #else
     60 #define DPRINTF(a)	uprintf a
     61 #endif
     62 
     63 #include <compat/linux/common/linux_types.h>
     64 #include <compat/linux/common/linux_signal.h>
     65 #include <compat/linux/common/linux_machdep.h>
     66 #include <compat/linux/common/linux_util.h>
     67 #include <compat/linux/common/linux_ioctl.h>
     68 #include <compat/linux/common/linux_hdio.h>
     69 #include <compat/linux/common/linux_exec.h>
     70 #include <compat/linux/common/linux_errno.h>
     71 #include <compat/linux//linux_syscallargs.h>
     72 
     73 int
     74 linux_exec_setup_stack(struct lwp *l, struct exec_package *epp)
     75 {
     76 	u_long max_stack_size;
     77 	u_long access_linear_min, access_size;
     78 	u_long noaccess_linear_min, noaccess_size;
     79 
     80 #ifndef	USRSTACK32
     81 #define USRSTACK32	(0x00000000ffffffffL&~PGOFSET)
     82 #endif
     83 
     84 	if (epp->ep_flags & EXEC_32) {
     85 		epp->ep_minsaddr = USRSTACK32;
     86 		max_stack_size = MAXSSIZ;
     87 	} else {
     88 		epp->ep_minsaddr = USRSTACK;
     89 		max_stack_size = MAXSSIZ;
     90 	}
     91 
     92 	if (epp->ep_minsaddr > LINUX_USRSTACK)
     93 		epp->ep_minsaddr = LINUX_USRSTACK;
     94 #ifdef DEBUG_LINUX
     95 	else {
     96 		/*
     97 		 * Someone needs to make KERNBASE and TEXTADDR
     98 		 * java versions < 1.4.2 need the stack to be
     99 		 * at 0xC0000000
    100 		 */
    101 		uprintf("Cannot setup stack to 0xC0000000, "
    102 		    "java will not work properly\n");
    103 	}
    104 #endif
    105 	epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr,
    106 		max_stack_size);
    107 	epp->ep_ssize = l->l_proc->p_rlimit[RLIMIT_STACK].rlim_cur;
    108 
    109 	/*
    110 	 * set up commands for stack.  note that this takes *two*, one to
    111 	 * map the part of the stack which we can access, and one to map
    112 	 * the part which we can't.
    113 	 *
    114 	 * arguably, it could be made into one, but that would require the
    115 	 * addition of another mapping proc, which is unnecessary
    116 	 */
    117 	access_size = epp->ep_ssize;
    118 	access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size);
    119 	noaccess_size = max_stack_size - access_size;
    120 	noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
    121 	    access_size), noaccess_size);
    122 	if (noaccess_size > 0) {
    123 		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
    124 		    noaccess_linear_min, NULLVP, 0, VM_PROT_NONE);
    125 	}
    126 	KASSERT(access_size > 0);
    127 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
    128 	    access_linear_min, NULLVP, 0, VM_PROT_READ | VM_PROT_WRITE);
    129 
    130 	return 0;
    131 }
    132 
    133 
    134 #ifdef LINUX_NPTL
    135 
    136 int
    137 linux_init_thread_area(struct lwp *l, struct lwp *l2)
    138 {
    139 	struct trapframe *tf = l->l_md.md_regs, *tf2 = l2->l_md.md_regs;
    140 	struct pcb *pcb2 = &l2->l_addr->u_pcb;
    141 	struct linux_user_desc info;
    142 	struct segment_descriptor sd;
    143 	int error, idx, a[2];
    144 
    145 	error = copyin((void *)tf->tf_esi, &info, sizeof(info));
    146 	if (error)
    147 		return error;
    148 	idx = info.entry_number;
    149 
    150 	/*
    151 	 * looks like we're getting the idx we returned
    152 	 * in the set_thread_area() syscall
    153 	 */
    154 	if (idx != 6 && idx != 3) {
    155 		printf("resetting idx %d to 3", idx);
    156 		idx = 3;
    157 	}
    158 
    159 	/* this doesnt happen in practice */
    160 	if (idx == 6) {
    161 		/* we might copy out the entry_number as 3 */
    162 		info.entry_number = 3;
    163 		error = copyout(&info, (void *)tf->tf_esi, sizeof(info));
    164 		if (error)
    165 			return error;
    166 	}
    167 
    168 	a[0] = LINUX_LDT_entry_a(&info);
    169 	a[1] = LINUX_LDT_entry_b(&info);
    170 
    171 	(void)memcpy(&sd, &a, sizeof(a));
    172 	DPRINTF(("Segment created in clone with CLONE_SETTLS: lobase: %x, "
    173 	    "hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, "
    174 	    "xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
    175 	    sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, sd.sd_type, sd.sd_dpl,
    176 	    sd.sd_p, sd.sd_xx, sd.sd_def32, sd.sd_gran));
    177 
    178 	(void)memcpy(pcb2->pcb_gsd, &sd, sizeof(pcb2->pcb_gsd));
    179 	tf2->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
    180 	return 0;
    181 }
    182 
    183 
    184 int
    185 linux_sys_set_thread_area(struct lwp *l,
    186     const struct linux_sys_set_thread_area_args *uap, register_t *retval)
    187 {
    188 	struct trapframe *tf = l->l_md.md_regs;
    189 	struct pcb *pcb = &l->l_addr->u_pcb;
    190 	struct linux_user_desc info;
    191 	struct segment_descriptor sd;
    192 	int error, idx, a[2];
    193 
    194 	*retval = 0;
    195 	error = copyin(SCARG(uap, desc), &info, sizeof(info));
    196 	if (error)
    197 		return error;
    198 
    199 	DPRINTF(("set thread area: %i, %x, %x, %i, %i, %i, %i, %i, %i\n",
    200 	    info.entry_number, info.base_addr, info.limit, info.seg_32bit,
    201 	    info.contents, info.read_exec_only, info.limit_in_pages,
    202 	    info.seg_not_present, info.useable));
    203 
    204 	idx = info.entry_number;
    205 	/*
    206 	 * Semantics of linux version: every thread in the system has array of
    207 	 * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This
    208 	 * syscall loads one of the selected tls decriptors with a value and
    209 	 * also loads GDT descriptors 6, 7 and 8 with the content of the
    210 	 * per-thread descriptors.
    211 	 *
    212 	 * Semantics of fbsd version: I think we can ignore that linux has 3
    213 	 * per-thread descriptors and use just the 1st one. The tls_array[]
    214 	 * is used only in set/get-thread_area() syscalls and for loading the
    215 	 * GDT descriptors. In fbsd we use just one GDT descriptor for TLS so
    216 	 * we will load just one.
    217 	 *
    218 	 * XXX: this doesn't work when a user space process tries to use more
    219 	 * than 1 TLS segment. Comment in the linux sources says wine might do
    220 	 * this.
    221 	 */
    222 
    223 	/*
    224 	 * we support just GLIBC TLS now
    225 	 * we should let 3 proceed as well because we use this segment so
    226 	 * if code does two subsequent calls it should succeed
    227 	 */
    228 	if (idx != 6 && idx != -1 && idx != 3)
    229 		return EINVAL;
    230 
    231 	/*
    232 	 * we have to copy out the GDT entry we use
    233 	 * FreeBSD uses GDT entry #3 for storing %gs so load that
    234 	 *
    235 	 * XXX: what if a user space program doesn't check this value and tries
    236 	 * to use 6, 7 or 8?
    237 	 */
    238 	idx = info.entry_number = 3;
    239 	error = copyout(&info, SCARG(uap, desc), sizeof(info));
    240 	if (error)
    241 		return error;
    242 
    243 	if (LINUX_LDT_empty(&info)) {
    244 		a[0] = 0;
    245 		a[1] = 0;
    246 	} else {
    247 		a[0] = LINUX_LDT_entry_a(&info);
    248 		a[1] = LINUX_LDT_entry_b(&info);
    249 	}
    250 
    251 	(void)memcpy(&sd, &a, sizeof(a));
    252 	DPRINTF(("Segment created in set_thread_area: lobase: %x, hibase: %x, "
    253 	    "lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, "
    254 	    "def32: %i, gran: %i\n", sd.sd_lobase, sd.sd_hibase, sd.sd_lolimit,
    255 	    sd.sd_hilimit, sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx,
    256 	    sd.sd_def32, sd.sd_gran));
    257 
    258 	(void)memcpy(pcb->pcb_gsd, &sd, sizeof(pcb->pcb_gsd));
    259 	tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
    260 	return 0;
    261 }
    262 
    263 int
    264 linux_sys_get_thread_area(struct lwp *l,
    265     const struct linux_sys_get_thread_area_args *uap, register_t *retval)
    266 {
    267 	struct pcb *pcb = &l->l_addr->u_pcb;
    268 	struct linux_user_desc info;
    269 	struct linux_desc_struct desc;
    270 	struct segment_descriptor sd;
    271 	int error, idx;
    272 
    273 	*retval = 0;
    274 	error = copyin(SCARG(uap, desc), &info, sizeof(info));
    275 	if (error)
    276 		return error;
    277 
    278 	idx = info.entry_number;
    279 	/* XXX: I am not sure if we want 3 to be allowed too. */
    280 	if (idx != 6 && idx != 3)
    281 		return EINVAL;
    282 
    283 	idx = 3;
    284 
    285 	(void)memset(&info, 0, sizeof(info));
    286 	(void)memcpy(&sd, pcb->pcb_gsd, sizeof(sd));
    287 	(void)memcpy(&desc, &sd, sizeof(desc));
    288 
    289 	info.entry_number = idx;
    290 	info.base_addr = LINUX_GET_BASE(&desc);
    291 	info.limit = LINUX_GET_LIMIT(&desc);
    292 	info.seg_32bit = LINUX_GET_32BIT(&desc);
    293 	info.contents = LINUX_GET_CONTENTS(&desc);
    294 	info.read_exec_only = !LINUX_GET_WRITABLE(&desc);
    295 	info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc);
    296 	info.seg_not_present = !LINUX_GET_PRESENT(&desc);
    297 	info.useable = LINUX_GET_USEABLE(&desc);
    298 
    299 	return copyout(&info, SCARG(uap, desc), sizeof(info));
    300 }
    301 
    302 #endif
    303