Home | History | Annotate | Line # | Download | only in common
linux_exec_aout.c revision 1.6
      1 /*	$NetBSD: linux_exec_aout.c,v 1.6 1995/06/11 14:56:47 fvdl Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1995 Frank van der Linden
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *      This product includes software developed for the NetBSD Project
     18  *      by Frank van der Linden
     19  * 4. The name of the author may not be used to endorse or promote products
     20  *    derived from this software without specific prior written permission
     21  *
     22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     32  *
     33  * based on kern/exec_aout.c and compat/sunos/sunos_exec.c
     34  */
     35 
     36 #include <sys/param.h>
     37 #include <sys/systm.h>
     38 #include <sys/filedesc.h>
     39 #include <sys/kernel.h>
     40 #include <sys/proc.h>
     41 #include <sys/mount.h>
     42 #include <sys/malloc.h>
     43 #include <sys/namei.h>
     44 #include <sys/vnode.h>
     45 #include <sys/file.h>
     46 #include <sys/resourcevar.h>
     47 #include <sys/wait.h>
     48 
     49 #include <sys/mman.h>
     50 #include <vm/vm.h>
     51 #include <vm/vm_param.h>
     52 #include <vm/vm_map.h>
     53 #include <vm/vm_kern.h>
     54 #include <vm/vm_pager.h>
     55 
     56 #include <machine/cpu.h>
     57 #include <machine/reg.h>
     58 #include <machine/exec.h>
     59 #include <machine/linux_machdep.h>
     60 
     61 #include <compat/linux/linux_types.h>
     62 #include <compat/linux/linux_syscall.h>
     63 #include <compat/linux/linux_syscallargs.h>
     64 #include <compat/linux/linux_util.h>
     65 #include <compat/linux/linux_exec.h>
     66 
     67 struct elf_args {
     68 	u_long  arg_entry;	/* progran entry point */
     69 	u_long  arg_interp;	/* Interpreter load address */
     70 	u_long  arg_phaddr;	/* program header address */
     71 	u_long  arg_phentsize;	/* Size of program header */
     72 	u_long  arg_phnum;	/* Number of program headers */
     73 };
     74 
     75 static void *linux_aout_copyargs __P((struct exec_package *,
     76 	struct ps_strings *, void *, void *));
     77 static void *linux_elf_copyargs __P((struct exec_package *, struct ps_strings *,
     78 	void *, void *));
     79 static int linux_elf_check_header __P((Elf32_Ehdr *, int));
     80 static void linux_elf_load_psection __P((struct exec_vmcmd_set *,
     81 	struct vnode *, Elf32_Phdr *, u_long *, u_long *, int *));
     82 static int linux_elf_set_segment __P((struct exec_package *, u_long, u_long,
     83 	int));
     84 static int linux_elf_read_from __P((struct vnode *, u_long, struct proc *,
     85 	caddr_t, int));
     86 static int linux_elf_load_file __P((struct proc *, char *,
     87 	struct exec_vmcmd_set *, u_long *, struct elf_args *, u_long *));
     88 
     89 #ifdef DEBUG_EXEC_LINUX_ELF
     90 #define DPRINTF(x) printf x
     91 #else
     92 #define DPRINTF(x)
     93 #endif
     94 
     95 #define LINUX_ELF_ALIGN(a, b) ((a) & ~((b) - 1))
     96 #define LINUX_ELF_AUX_ARGSIZ (sizeof(AuxInfo) * 8 / sizeof(char *))
     97 #define	LINUX_AOUT_AUX_ARGSIZ	2
     98 
     99 extern int linux_error[];
    100 extern struct sysent linux_sysent[];
    101 extern char *linux_syscallnames[];
    102 
    103 struct emul emul_linux_aout = {
    104 	"linux",
    105 	linux_error,
    106 	linux_sendsig,
    107 	LINUX_SYS_syscall,
    108 	LINUX_SYS_MAXSYSCALL,
    109 	linux_sysent,
    110 	linux_syscallnames,
    111 	LINUX_AOUT_AUX_ARGSIZ,
    112 	linux_aout_copyargs,
    113 	setregs,
    114 	linux_sigcode,
    115 	linux_esigcode,
    116 };
    117 
    118 struct emul emul_linux_elf = {
    119 	"linux",
    120 	linux_error,
    121 	linux_sendsig,
    122 	LINUX_SYS_syscall,
    123 	LINUX_SYS_MAXSYSCALL,
    124 	linux_sysent,
    125 	linux_syscallnames,
    126 	LINUX_ELF_AUX_ARGSIZ,
    127 	linux_elf_copyargs,
    128 	setregs,
    129 	linux_sigcode,
    130 	linux_esigcode,
    131 };
    132 
    133 
    134 static void *
    135 linux_aout_copyargs(pack, arginfo, stack, argp)
    136 	struct exec_package *pack;
    137 	struct ps_strings *arginfo;
    138 	void *stack;
    139 	void *argp;
    140 {
    141 	char **cpp = stack;
    142 	char **stk = stack;
    143 	char *dp, *sp;
    144 	size_t len;
    145 	void *nullp = NULL;
    146 	int argc = arginfo->ps_nargvstr;
    147 	int envc = arginfo->ps_nenvstr;
    148 
    149 	if (copyout(&argc, cpp++, sizeof(argc)))
    150 		return NULL;
    151 
    152 	/* leave room for envp and argv */
    153 	cpp += 2;
    154 	if (copyout(&cpp, &stk[1], sizeof (cpp)))
    155 		return NULL;
    156 
    157 	dp = (char *) (cpp + argc + envc + 2);
    158 	sp = argp;
    159 
    160 	/* XXX don't copy them out, remap them! */
    161 	arginfo->ps_argvstr = cpp; /* remember location of argv for later */
    162 
    163 	for (; --argc >= 0; sp += len, dp += len)
    164 		if (copyout(&dp, cpp++, sizeof(dp)) ||
    165 		    copyoutstr(sp, dp, ARG_MAX, &len))
    166 			return NULL;
    167 
    168 	if (copyout(&nullp, cpp++, sizeof(nullp)))
    169 		return NULL;
    170 
    171 	if (copyout(&cpp, &stk[2], sizeof (cpp)))
    172 		return NULL;
    173 
    174 	arginfo->ps_envstr = cpp; /* remember location of envp for later */
    175 
    176 	for (; --envc >= 0; sp += len, dp += len)
    177 		if (copyout(&dp, cpp++, sizeof(dp)) ||
    178 		    copyoutstr(sp, dp, ARG_MAX, &len))
    179 			return NULL;
    180 
    181 	if (copyout(&nullp, cpp++, sizeof(nullp)))
    182 		return NULL;
    183 
    184 	return cpp;
    185 }
    186 
    187 static void *
    188 linux_elf_copyargs(pack, arginfo, stack, argp)
    189 	struct exec_package *pack;
    190 	struct ps_strings *arginfo;
    191 	void *stack;
    192 	void *argp;
    193 {
    194 	char **cpp = stack;
    195 	char *dp, *sp;
    196 	size_t len;
    197 	void *nullp = NULL;
    198 	int argc = arginfo->ps_nargvstr;
    199 	int envc = arginfo->ps_nenvstr;
    200 	AuxInfo *a;
    201 	struct elf_args *ap;
    202 
    203 	if (copyout(&argc, cpp++, sizeof(argc)))
    204 		return NULL;
    205 
    206 	dp = (char *) (cpp + argc + envc + 2 + pack->ep_emul->e_arglen);
    207 	sp = argp;
    208 
    209 	/* XXX don't copy them out, remap them! */
    210 	arginfo->ps_argvstr = cpp; /* remember location of argv for later */
    211 
    212 	for (; --argc >= 0; sp += len, dp += len)
    213 		if (copyout(&dp, cpp++, sizeof(dp)) ||
    214 		    copyoutstr(sp, dp, ARG_MAX, &len))
    215 			return NULL;
    216 
    217 	if (copyout(&nullp, cpp++, sizeof(nullp)))
    218 		return NULL;
    219 
    220 	arginfo->ps_envstr = cpp; /* remember location of envp for later */
    221 
    222 	for (; --envc >= 0; sp += len, dp += len)
    223 		if (copyout(&dp, cpp++, sizeof(dp)) ||
    224 		    copyoutstr(sp, dp, ARG_MAX, &len))
    225 			return NULL;
    226 
    227 	if (copyout(&nullp, cpp++, sizeof(nullp)))
    228 		return NULL;
    229 
    230 	/*
    231 	 * Push extra arguments on the stack needed by dynamically
    232 	 * linked binaries
    233 	 */
    234 	a = (AuxInfo *) cpp;
    235 	if ((ap = (struct elf_args *) pack->ep_emul_arg)) {
    236 
    237 		DPRINTF(("phaddr=0x%x, phsize=%d, phnum=%d, interp=0x%x, ",
    238 			 ap->arg_phaddr, ap->arg_phentsize, ap->arg_phnum,
    239 			 ap->arg_interp));
    240 		DPRINTF((" entry=0x%x\n", ap->arg_entry));
    241 
    242 		a->au_id = AUX_phdr;
    243 		a->au_v = ap->arg_phaddr;
    244 		a++;
    245 
    246 		a->au_id = AUX_phent;
    247 		a->au_v = ap->arg_phentsize;
    248 		a++;
    249 
    250 		a->au_id = AUX_phnum;
    251 		a->au_v = ap->arg_phnum;
    252 		a++;
    253 
    254 		a->au_id = AUX_pagesz;
    255 		a->au_v = NBPG;
    256 		a++;
    257 
    258 		a->au_id = AUX_base;
    259 		a->au_v = ap->arg_interp;
    260 		a++;
    261 
    262 		a->au_id = AUX_flags;
    263 		a->au_v = 0;
    264 		a++;
    265 
    266 		a->au_id = AUX_entry;
    267 		a->au_v = ap->arg_entry;
    268 		a++;
    269 
    270 		a->au_id = AUX_null;
    271 		a->au_v = 0;
    272 		a++;
    273 
    274 		free((char *) ap, M_TEMP);
    275 	}
    276 	return a;
    277 }
    278 
    279 #ifdef DEBUG_EXEC_LINUX_ELF
    280 static void
    281 print_Ehdr(e)
    282 	Elf32_Ehdr     *e;
    283 {
    284 	printf("e_ident %s, ", e->e_ident);
    285 	printf("e_type %d, ", e->e_type);
    286 	printf("e_machine %d, ", e->e_machine);
    287 	printf("e_version %ld, ", e->e_version);
    288 	printf("e_entry %lx, ", e->e_entry);
    289 	printf("e_phoff %lx, ", e->e_phoff);
    290 	printf("e_shoff %lx, ", e->e_shoff);
    291 	printf("e_flags %lx, ", e->e_flags);
    292 	printf("e_ehsize %d, ", e->e_ehsize);
    293 	printf("e_phentsize %d, ", e->e_phentsize);
    294 	printf("e_phnum %d, ", e->e_phnum);
    295 	printf("e_shentsize %d, ", e->e_shentsize);
    296 	printf("e_shnum %d, ", e->e_shnum);
    297 	printf("e_shstrndx %d\n", e->e_shstrndx);
    298 }
    299 
    300 
    301 static void
    302 print_Phdr(p)
    303 	Elf32_Phdr     *p;
    304 {
    305 	static char    *types[] =
    306 	{
    307 		"null", "load", "dynamic", "interp",
    308 		"note", "shlib", "phdr", "entry7"
    309 	};
    310 
    311 	printf("p_type %ld [%s], ", p->p_type, types[p->p_type & 7]);
    312 	printf("p_offset %lx, ", p->p_offset);
    313 	printf("p_vaddr %lx, ", p->p_vaddr);
    314 	printf("p_paddr %lx, ", p->p_paddr);
    315 	printf("p_filesz %ld, ", p->p_filesz);
    316 	printf("p_memsz %ld, ", p->p_memsz);
    317 	printf("p_flags %lx, ", p->p_flags);
    318 	printf("p_align %ld\n", p->p_align);
    319 }
    320 #endif
    321 
    322 int
    323 exec_linux_aout_makecmds(p, epp)
    324 	struct proc *p;
    325 	struct exec_package *epp;
    326 {
    327 	struct exec *linux_ep = epp->ep_hdr;
    328 	int machtype, magic;
    329 	int error = ENOEXEC;
    330 
    331 	magic = LINUX_N_MAGIC(linux_ep);
    332 	machtype = LINUX_N_MACHTYPE(linux_ep);
    333 
    334 
    335 	if (machtype != LINUX_MID_MACHINE)
    336 		return (ENOEXEC);
    337 
    338 	switch (magic) {
    339 	case QMAGIC:
    340 		error = exec_linux_aout_prep_qmagic(p, epp);
    341 		break;
    342 	case ZMAGIC:
    343 		error = exec_linux_aout_prep_zmagic(p, epp);
    344 		break;
    345 	case NMAGIC:
    346 		error = exec_linux_aout_prep_nmagic(p, epp);
    347 		break;
    348 	case OMAGIC:
    349 		error = exec_linux_aout_prep_omagic(p, epp);
    350 		break;
    351 	}
    352 	if (error == 0)
    353 		epp->ep_emul = &emul_linux_aout;
    354 	return error;
    355 }
    356 
    357 /*
    358  * Since text starts at 0x400 in Linux ZMAGIC executables, and 0x400
    359  * is very likely not page aligned on most architectures, it is treated
    360  * as an NMAGIC here. XXX
    361  */
    362 
    363 int
    364 exec_linux_aout_prep_zmagic(p, epp)
    365 	struct proc *p;
    366 	struct exec_package *epp;
    367 {
    368 	struct exec *execp = epp->ep_hdr;
    369 
    370 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, ZMAGIC);
    371 	epp->ep_tsize = execp->a_text;
    372 	epp->ep_daddr = LINUX_N_DATADDR(*execp, ZMAGIC);
    373 	epp->ep_dsize = execp->a_data + execp->a_bss;
    374 	epp->ep_entry = execp->a_entry;
    375 
    376 	/* set up command for text segment */
    377 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
    378 	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, ZMAGIC),
    379 	    VM_PROT_READ|VM_PROT_EXECUTE);
    380 
    381 	/* set up command for data segment */
    382 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
    383 	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, ZMAGIC),
    384 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
    385 
    386 	/* set up command for bss segment */
    387 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
    388 	    epp->ep_daddr + execp->a_data, NULLVP, 0,
    389 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
    390 
    391 	return exec_aout_setup_stack(p, epp);
    392 }
    393 
    394 /*
    395  * exec_aout_prep_nmagic(): Prepare Linux NMAGIC package.
    396  * Not different from the normal stuff.
    397  */
    398 
    399 int
    400 exec_linux_aout_prep_nmagic(p, epp)
    401 	struct proc *p;
    402 	struct exec_package *epp;
    403 {
    404 	struct exec *execp = epp->ep_hdr;
    405 	long bsize, baddr;
    406 
    407 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, NMAGIC);
    408 	epp->ep_tsize = execp->a_text;
    409 	epp->ep_daddr = LINUX_N_DATADDR(*execp, NMAGIC);
    410 	epp->ep_dsize = execp->a_data + execp->a_bss;
    411 	epp->ep_entry = execp->a_entry;
    412 
    413 	/* set up command for text segment */
    414 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_text,
    415 	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, NMAGIC),
    416 	    VM_PROT_READ|VM_PROT_EXECUTE);
    417 
    418 	/* set up command for data segment */
    419 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, execp->a_data,
    420 	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, NMAGIC),
    421 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
    422 
    423 	/* set up command for bss segment */
    424 	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
    425 	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
    426 	if (bsize > 0)
    427 		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
    428 		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
    429 
    430 	return exec_aout_setup_stack(p, epp);
    431 }
    432 
    433 /*
    434  * exec_aout_prep_omagic(): Prepare Linux OMAGIC package.
    435  * Business as usual.
    436  */
    437 
    438 int
    439 exec_linux_aout_prep_omagic(p, epp)
    440 	struct proc *p;
    441 	struct exec_package *epp;
    442 {
    443 	struct exec *execp = epp->ep_hdr;
    444 	long dsize, bsize, baddr;
    445 
    446 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, OMAGIC);
    447 	epp->ep_tsize = execp->a_text;
    448 	epp->ep_daddr = LINUX_N_DATADDR(*execp, OMAGIC);
    449 	epp->ep_dsize = execp->a_data + execp->a_bss;
    450 	epp->ep_entry = execp->a_entry;
    451 
    452 	/* set up command for text and data segments */
    453 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn,
    454 	    execp->a_text + execp->a_data, epp->ep_taddr, epp->ep_vp,
    455 	    LINUX_N_TXTOFF(*execp, OMAGIC), VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
    456 
    457 	/* set up command for bss segment */
    458 	baddr = roundup(epp->ep_daddr + execp->a_data, NBPG);
    459 	bsize = epp->ep_daddr + epp->ep_dsize - baddr;
    460 	if (bsize > 0)
    461 		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, bsize, baddr,
    462 		    NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
    463 
    464 	/*
    465 	 * Make sure (# of pages) mapped above equals (vm_tsize + vm_dsize);
    466 	 * obreak(2) relies on this fact. Both `vm_tsize' and `vm_dsize' are
    467 	 * computed (in execve(2)) by rounding *up* `ep_tsize' and `ep_dsize'
    468 	 * respectively to page boundaries.
    469 	 * Compensate `ep_dsize' for the amount of data covered by the last
    470 	 * text page.
    471 	 */
    472 	dsize = epp->ep_dsize + execp->a_text - roundup(execp->a_text, NBPG);
    473 	epp->ep_dsize = (dsize > 0) ? dsize : 0;
    474 	return exec_aout_setup_stack(p, epp);
    475 }
    476 
    477 int
    478 exec_linux_aout_prep_qmagic(p, epp)
    479 	struct proc *p;
    480 	struct exec_package *epp;
    481 {
    482 	struct exec *execp = epp->ep_hdr;
    483 
    484 	epp->ep_taddr = LINUX_N_TXTADDR(*execp, QMAGIC);
    485 	epp->ep_tsize = execp->a_text;
    486 	epp->ep_daddr = LINUX_N_DATADDR(*execp, QMAGIC);
    487 	epp->ep_dsize = execp->a_data + execp->a_bss;
    488 	epp->ep_entry = execp->a_entry;
    489 
    490 	/*
    491 	 * check if vnode is in open for writing, because we want to
    492 	 * demand-page out of it.  if it is, don't do it, for various
    493 	 * reasons
    494 	 */
    495 	if ((execp->a_text != 0 || execp->a_data != 0) &&
    496 	    epp->ep_vp->v_writecount != 0) {
    497 #ifdef DIAGNOSTIC
    498 		if (epp->ep_vp->v_flag & VTEXT)
    499 			panic("exec: a VTEXT vnode has writecount != 0\n");
    500 #endif
    501 		return ETXTBSY;
    502 	}
    503 	epp->ep_vp->v_flag |= VTEXT;
    504 
    505 	/* set up command for text segment */
    506 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_text,
    507 	    epp->ep_taddr, epp->ep_vp, LINUX_N_TXTOFF(*execp, QMAGIC),
    508 	    VM_PROT_READ|VM_PROT_EXECUTE);
    509 
    510 	/* set up command for data segment */
    511 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_pagedvn, execp->a_data,
    512 	    epp->ep_daddr, epp->ep_vp, LINUX_N_DATOFF(*execp, QMAGIC),
    513 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
    514 
    515 	/* set up command for bss segment */
    516 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, execp->a_bss,
    517 	    epp->ep_daddr + execp->a_data, NULLVP, 0,
    518 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
    519 
    520 	return exec_aout_setup_stack(p, epp);
    521 }
    522 
    523 /*
    524  * linux_elf_check_header():
    525  *
    526  * Check header for validity; return 0 of ok ENOEXEC if error
    527  */
    528 static int
    529 linux_elf_check_header(eh, type)
    530 	Elf32_Ehdr     *eh;
    531 	int             type;
    532 {
    533 #ifdef sparc
    534   /* #$%@#$%@#$%! */
    535 # define memcmp bcmp
    536 #endif
    537 	if (memcmp(eh->e_ident, Elf32_e_ident, Elf32_e_siz) != 0) {
    538 		DPRINTF(("Not an elf file\n"));
    539 		return ENOEXEC;
    540 	}
    541 
    542 	switch (eh->e_machine) {
    543 #ifdef i386
    544 	case Elf32_em_386:
    545 	case Elf32_em_486:
    546 #endif
    547 #ifdef sparc
    548 	case Elf32_em_sparc:
    549 #endif
    550 		break;
    551 
    552 	default:
    553 		DPRINTF(("Unsupported elf machine type %d\n", eh->e_machine));
    554 		return ENOEXEC;
    555 	}
    556 
    557 	if (eh->e_type != type) {
    558 		DPRINTF(("Not an elf executable\n"));
    559 		return ENOEXEC;
    560 	}
    561 
    562 	return 0;
    563 }
    564 
    565 
    566 /*
    567  * linux_elf_load_psection():
    568  *
    569  * Load a psection at the appropriate address
    570  */
    571 static void
    572 linux_elf_load_psection(vcset, vp, ph, addr, size, prot)
    573 	struct exec_vmcmd_set   *vcset;
    574 	struct vnode		*vp;
    575 	Elf32_Phdr		*ph;
    576 	u_long			*addr;
    577 	u_long			*size;
    578 	int			*prot;
    579 {
    580 	u_long	uaddr;
    581 	long	diff;
    582 	long	offset;
    583 	u_long	msize;
    584 
    585 	/*
    586          * If the user specified an address, then we load there.
    587          */
    588 	if (*addr != ~0) {
    589 		uaddr = *addr + ph->p_align;
    590 		*addr = LINUX_ELF_ALIGN(uaddr, ph->p_align);
    591 		uaddr = LINUX_ELF_ALIGN(ph->p_vaddr, ph->p_align);
    592 		diff = ph->p_vaddr - uaddr;
    593 	} else {
    594 		uaddr = ph->p_vaddr;
    595 		*addr = LINUX_ELF_ALIGN(uaddr, ph->p_align);
    596 		diff = uaddr - *addr;
    597 	}
    598 
    599 	*prot |= (ph->p_flags & Elf32_pf_r) ? VM_PROT_READ : 0;
    600 	*prot |= (ph->p_flags & Elf32_pf_w) ? VM_PROT_WRITE : 0;
    601 	*prot |= (ph->p_flags & Elf32_pf_x) ? VM_PROT_EXECUTE : 0;
    602 
    603 	offset = ph->p_offset - diff;
    604 	*size = ph->p_filesz + diff;
    605 	msize = ph->p_memsz + diff;
    606 
    607 	DPRINTF(("Elf Seg@ 0x%x/0x%x sz %d/%d off 0x%x/0x%x[%d] algn 0x%x\n",
    608 		 ph->p_vaddr, *addr, *size, msize, ph->p_offset, offset,
    609 		 diff, ph->p_align));
    610 
    611 	NEW_VMCMD(vcset, vmcmd_map_readvn, *size,
    612 		  *addr, vp, offset, *prot);
    613 
    614 	/*
    615          * Check if we need to extend the size of the segment
    616          */
    617 	{
    618 		u_long	rm = round_page(*addr + msize);
    619 		u_long	rf = round_page(*addr + *size);
    620 		if (rm != rf) {
    621 			DPRINTF(("zeropad 0x%x-0x%x\n", rf, rm));
    622 			NEW_VMCMD(vcset, vmcmd_map_zero, rm - rf,
    623 				  rf, NULLVP, 0, *prot);
    624 			*size = msize;
    625 		}
    626 	}
    627 }
    628 
    629 
    630 /*
    631  * linux_elf_set_segment():
    632  *
    633  * Decide if the segment is text or data, depending on the protection
    634  * and set it appropriately
    635  */
    636 static int
    637 linux_elf_set_segment(epp, vaddr, size, prot)
    638 	struct exec_package	*epp;
    639 	u_long			 vaddr;
    640 	u_long			 size;
    641 	int			 prot;
    642 {
    643 	/*
    644          * Kludge: Unfortunately the current implementation of
    645          * exec package assumes a single text and data segment.
    646          * In Elf we can have more, but here we limit ourselves
    647          * to two and hope :-(
    648          * We also assume that the text is r-x, and data is rwx.
    649          */
    650 	switch (prot) {
    651 	case (VM_PROT_READ | VM_PROT_EXECUTE):
    652 		if (epp->ep_tsize != ~0) {
    653 			DPRINTF(("More than one text segment\n"));
    654 			return ENOEXEC;
    655 		}
    656 		epp->ep_taddr = vaddr;
    657 		epp->ep_tsize = size;
    658 		DPRINTF(("Elf Text@ 0x%x, size %d\n", vaddr, size));
    659 		break;
    660 
    661 	case (VM_PROT_READ | VM_PROT_WRITE):
    662 	case (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE):
    663 		if (epp->ep_dsize != ~0) {
    664 			DPRINTF(("More than one data segment\n"));
    665 			return ENOEXEC;
    666 		}
    667 		epp->ep_daddr = vaddr;
    668 		epp->ep_dsize = size;
    669 
    670 		DPRINTF(("Elf Data@ 0x%x, size %d\n", vaddr, size));
    671 		break;
    672 
    673 	default:
    674 		DPRINTF(("Bad protection 0%o\n", prot));
    675 		return ENOEXEC;
    676 	}
    677 	return 0;
    678 }
    679 
    680 
    681 /*
    682  * linux_elf_read_from():
    683  *
    684  *	Read from vnode into buffer at offset.
    685  */
    686 static int
    687 linux_elf_read_from(vp, off, p, buf, size)
    688 	struct vnode	*vp;
    689 	u_long		 off;
    690 	struct proc	*p;
    691 	caddr_t		 buf;
    692 	int		 size;
    693 {
    694 	int	error;
    695 	int	resid;
    696 
    697 	DPRINTF(("read from 0x%x to 0x%x size %d\n",
    698 		 off, buf, size));
    699 	if ((error = vn_rdwr(UIO_READ, vp, buf, size,
    700 			     off, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred,
    701 			     &resid, p)) != 0) {
    702 		DPRINTF(("Bad read error %d\n", error));
    703 		return error;
    704 	}
    705 	/*
    706          * See if we got all of it
    707          */
    708 	if (resid != 0) {
    709 		DPRINTF(("Incomplete read for header ask=%d, rem=%d\n",
    710 			 size, resid));
    711 		return error;
    712 	}
    713 	return 0;
    714 }
    715 
    716 
    717 /*
    718  * linux_elf_load_file():
    719  *
    720  * Load a file (interpreter/library) pointed to by path
    721  * [stolen from coff_load_shlib()]. Made slightly more generic than
    722  * the svr4 version, for possible later use in linux_uselib().
    723  */
    724 static int
    725 linux_elf_load_file(p, path, vcset, entry, ap, last)
    726 	struct proc		*p;
    727 	char			*path;
    728 	struct exec_vmcmd_set   *vcset;
    729 	u_long			*entry;
    730 	struct elf_args		*ap;
    731 	u_long			*last;
    732 {
    733 	int			 error, i;
    734 	struct nameidata	 nd;
    735 	Elf32_Ehdr		 eh;
    736 	Elf32_Phdr		*ph = NULL;
    737 	u_long			 phsize;
    738 	char			*bp = NULL;
    739 	u_long			 addr = *last;
    740 
    741 	DPRINTF(("Loading file %s @ %x\n", path, addr));
    742 
    743 	if ((error = linux_emul_find(p, NULL, linux_emul_path, path, &bp, 0)) != 0)
    744 		bp = NULL;
    745 	else
    746 		path = bp;
    747 	/*
    748          * 1. open file
    749          * 2. read filehdr
    750          * 3. map text, data, and bss out of it using VM_*
    751          */
    752 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, p);
    753 	/* first get the vnode */
    754 	if ((error = namei(&nd)) != 0) {
    755 		if (bp != NULL)
    756 			free((char *) bp, M_TEMP);
    757 		return error;
    758 	}
    759 	if ((error = linux_elf_read_from(nd.ni_vp, 0, p, (caddr_t) &eh,
    760 				    sizeof(eh))) != 0)
    761 		goto bad;
    762 
    763 #ifdef DEBUG_EXEC_LINUX_ELF
    764 	print_Ehdr(&eh);
    765 #endif
    766 
    767 	if ((error = linux_elf_check_header(&eh, Elf32_et_dyn)) != 0)
    768 		goto bad;
    769 
    770 	phsize = eh.e_phnum * sizeof(Elf32_Phdr);
    771 	ph = (Elf32_Phdr *) malloc(phsize, M_TEMP, M_WAITOK);
    772 
    773 	if ((error = linux_elf_read_from(nd.ni_vp, eh.e_phoff, p,
    774 				    (caddr_t) ph, phsize)) != 0)
    775 		goto bad;
    776 
    777 	/*
    778          * Load all the necessary sections
    779          */
    780 	for (i = 0; i < eh.e_phnum; i++) {
    781 		u_long	size = 0;
    782 		int	prot = 0;
    783 #ifdef DEBUG_EXEC_LINUX_ELF
    784 		print_Phdr(&ph[i]);
    785 #endif
    786 
    787 		switch (ph[i].p_type) {
    788 		case Elf32_pt_load:
    789 			linux_elf_load_psection(vcset, nd.ni_vp, &ph[i], &addr,
    790 						&size, &prot);
    791 			/* Assume that the text segment is r-x only */
    792 			if ((prot & PROT_WRITE) == 0) {
    793 				*entry = addr + eh.e_entry;
    794 				ap->arg_interp = addr;
    795 				DPRINTF(("Interpreter@ 0x%x\n", addr));
    796 			}
    797 			addr += size;
    798 			break;
    799 
    800 		case Elf32_pt_dynamic:
    801 		case Elf32_pt_phdr:
    802 		case Elf32_pt_note:
    803 			break;
    804 
    805 		default:
    806 			DPRINTF(("interp: Unexpected program header type %d\n",
    807 				 ph[i].p_type));
    808 			break;
    809 		}
    810 	}
    811 
    812 bad:
    813 	if (ph != NULL)
    814 		free((char *) ph, M_TEMP);
    815 	if (bp != NULL)
    816 		free((char *) bp, M_TEMP);
    817 
    818 	*last = addr;
    819 	vrele(nd.ni_vp);
    820 	return error;
    821 }
    822 
    823 
    824 /*
    825  * exec_linux_elf_makecmds(): Prepare an Elf binary's exec package
    826  *
    827  * First, set of the various offsets/lengths in the exec package.
    828  *
    829  * Then, mark the text image busy (so it can be demand paged) or error
    830  * out if this is not possible.  Finally, set up vmcmds for the
    831  * text, data, bss, and stack segments.
    832  */
    833 int
    834 exec_linux_elf_makecmds(p, epp)
    835 	struct proc		*p;
    836 	struct exec_package	*epp;
    837 {
    838 	Elf32_Ehdr     *eh = epp->ep_hdr;
    839 	Elf32_Phdr     *ph, *pp;
    840 	int             error;
    841 	int             i;
    842 	char            interp[MAXPATHLEN];
    843 	u_long          pos = 0;
    844 	u_long          phsize;
    845 
    846 #ifdef DEBUG_EXEC_LINUX_ELF
    847 	print_Ehdr(eh);
    848 #endif
    849 	if (epp->ep_hdrvalid < sizeof(Elf32_Ehdr))
    850 		return ENOEXEC;
    851 
    852 	if (linux_elf_check_header(eh, Elf32_et_exec))
    853 		return ENOEXEC;
    854 
    855 	/*
    856          * check if vnode is in open for writing, because we want to
    857          * demand-page out of it.  if it is, don't do it, for various
    858          * reasons
    859          */
    860 	if (epp->ep_vp->v_writecount != 0) {
    861 #ifdef DIAGNOSTIC
    862 		if (epp->ep_vp->v_flag & VTEXT)
    863 			panic("exec: a VTEXT vnode has writecount != 0\n");
    864 #endif
    865 		return ETXTBSY;
    866 	}
    867 	/*
    868          * Allocate space to hold all the program headers, and read them
    869          * from the file
    870          */
    871 	phsize = eh->e_phnum * sizeof(Elf32_Phdr);
    872 	ph = (Elf32_Phdr *) malloc(phsize, M_TEMP, M_WAITOK);
    873 
    874 	if ((error = linux_elf_read_from(epp->ep_vp, eh->e_phoff, p,
    875 				    (caddr_t) ph, phsize)) != 0)
    876 		goto bad;
    877 
    878 	epp->ep_tsize = ~0;
    879 	epp->ep_dsize = ~0;
    880 
    881 	interp[0] = '\0';
    882 
    883 	/*
    884          * Load all the necessary sections
    885          */
    886 	for (i = 0; i < eh->e_phnum; i++) {
    887 		u_long          addr = ~0, size = 0;
    888 		int             prot = 0;
    889 
    890 		pp = &ph[i];
    891 #ifdef DEBUG_EXEC_LINUX_ELF
    892 		print_Phdr(pp);
    893 #endif
    894 
    895 		switch (ph[i].p_type) {
    896 		case Elf32_pt_load:
    897 			linux_elf_load_psection(&epp->ep_vmcmds, epp->ep_vp,
    898 				&ph[i], &addr, &size, &prot);
    899 			if ((error = linux_elf_set_segment(epp, addr, size,
    900 						      prot)) != 0)
    901 				goto bad;
    902 			break;
    903 
    904 		case Elf32_pt_shlib:
    905 			DPRINTF(("No support for COFF libraries (yet)\n"));
    906 			error = ENOEXEC;
    907 			goto bad;
    908 
    909 		case Elf32_pt_interp:
    910 			if (pp->p_filesz >= sizeof(interp)) {
    911 				DPRINTF(("Interpreter path too long %d\n",
    912 					 pp->p_filesz));
    913 				goto bad;
    914 			}
    915 			if ((error = linux_elf_read_from(epp->ep_vp, pp->p_offset, p,
    916 				      (caddr_t) interp, pp->p_filesz)) != 0)
    917 				goto bad;
    918 			break;
    919 
    920 		case Elf32_pt_dynamic:
    921 		case Elf32_pt_phdr:
    922 		case Elf32_pt_note:
    923 			break;
    924 
    925 		default:
    926 			/*
    927 			 * Not fatal, we don't need to understand everything
    928 			 * :-)
    929 			 */
    930 			DPRINTF(("Unsupported program header type %d\n",
    931 				 pp->p_type));
    932 			break;
    933 		}
    934 	}
    935 
    936 	/*
    937          * Check if we found a dynamically linked binary and arrange to load
    938          * it's interpreter
    939          */
    940 	if (interp[0]) {
    941 		struct elf_args *ap;
    942 		pos = ~0;
    943 
    944 		ap = (struct elf_args *) malloc(sizeof(struct elf_args),
    945 						 M_TEMP, M_WAITOK);
    946 		if ((error = linux_elf_load_file(p, interp, &epp->ep_vmcmds,
    947 				&epp->ep_entry, ap, &pos)) != 0) {
    948 			free((char *) ap, M_TEMP);
    949 			goto bad;
    950 		}
    951 		/* Arrange to load the program headers. */
    952 		pos = LINUX_ELF_ALIGN(pos + NBPG, NBPG);
    953 		DPRINTF(("Program header @0x%x\n", pos));
    954 		ap->arg_phaddr = pos;
    955 		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, phsize,
    956 			  pos, epp->ep_vp, eh->e_phoff,
    957 			  VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE);
    958 		pos += phsize;
    959 
    960 		ap->arg_phentsize = eh->e_phentsize;
    961 		ap->arg_phnum = eh->e_phnum;
    962 		ap->arg_entry = eh->e_entry;
    963 
    964 		epp->ep_emul_arg = ap;
    965 	} else
    966 		epp->ep_entry = eh->e_entry;
    967 
    968 	DPRINTF(("taddr 0x%x tsize 0x%x daddr 0x%x dsize 0x%x\n",
    969 	       epp->ep_taddr, epp->ep_tsize, epp->ep_daddr, epp->ep_dsize));
    970 
    971 	free((char *) ph, M_TEMP);
    972 
    973 	DPRINTF(("Elf entry@ 0x%x\n", epp->ep_entry));
    974 	epp->ep_vp->v_flag |= VTEXT;
    975 
    976 	epp->ep_emul = &emul_linux_elf;
    977 
    978 	return exec_aout_setup_stack(p, epp);
    979 
    980 bad:
    981 	free((char *) ph, M_TEMP);
    982 	kill_vmcmds(&epp->ep_vmcmds);
    983 	return ENOEXEC;
    984 }
    985 /*
    986  * The Linux system call to load shared libraries, a.out version. The
    987  * a.out shared libs are just files that are mapped onto a fixed
    988  * address in the process' address space. The address is given in
    989  * a_entry. Read in the header, set up some VM commands and run them.
    990  *
    991  * Yes, both text and data are mapped at once, so we're left with
    992  * writeable text for the shared libs. The Linux crt0 seemed to break
    993  * sometimes when data was mapped seperately. It munmapped a uselib()
    994  * of ld.so by hand, which failed with shared text and data for ld.so
    995  * Yuck.
    996  *
    997  * Because of the problem with ZMAGIC executables (text starts
    998  * at 0x400 in the file, but needs to be mapped at 0), ZMAGIC
    999  * shared libs are not handled very efficiently :-(
   1000  */
   1001 
   1002 int
   1003 linux_uselib(p, uap, retval)
   1004 	struct proc *p;
   1005 	struct linux_uselib_args /* {
   1006 		syscallarg(char *) path;
   1007 	} */ *uap;
   1008 	register_t *retval;
   1009 {
   1010 	caddr_t sg;
   1011 	long bsize, dsize, tsize, taddr, baddr, daddr;
   1012 	struct nameidata ni;
   1013 	struct vnode *vp;
   1014 	struct exec hdr;
   1015 	struct exec_vmcmd_set vcset;
   1016 	int rem, i, magic, error;
   1017 
   1018 	sg = stackgap_init();
   1019 	CHECK_ALT_EXIST(p, &sg, SCARG(uap, path));
   1020 
   1021 	NDINIT(&ni, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
   1022 
   1023 	if ((error = namei(&ni)))
   1024 		return error;
   1025 
   1026 	vp = ni.ni_vp;
   1027 
   1028 	if ((error = vn_rdwr(UIO_READ, vp, (caddr_t) &hdr, LINUX_AOUT_HDR_SIZE,
   1029 			     0, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred,
   1030 			     &rem, p))) {
   1031 		vrele(vp);
   1032 		return error;
   1033 	}
   1034 
   1035 	if (rem != 0) {
   1036 		vrele(vp);
   1037 		return ENOEXEC;
   1038 	}
   1039 
   1040 	if (LINUX_N_MACHTYPE(&hdr) != LINUX_MID_MACHINE)
   1041 		return ENOEXEC;
   1042 
   1043 	magic = LINUX_N_MAGIC(&hdr);
   1044 	taddr = hdr.a_entry & (~(NBPG - 1));
   1045 	tsize = hdr.a_text;
   1046 	daddr = taddr + tsize;
   1047 	dsize = hdr.a_data + hdr.a_bss;
   1048 
   1049 	if ((hdr.a_text != 0 || hdr.a_data != 0) && vp->v_writecount != 0) {
   1050 		vrele(vp);
   1051                 return ETXTBSY;
   1052         }
   1053 	vp->v_flag |= VTEXT;
   1054 
   1055 	vcset.evs_cnt = 0;
   1056 	vcset.evs_used = 0;
   1057 
   1058 	NEW_VMCMD(&vcset,
   1059 		  magic == ZMAGIC ? vmcmd_map_readvn : vmcmd_map_pagedvn,
   1060 		  hdr.a_text + hdr.a_data, taddr,
   1061 		  vp, LINUX_N_TXTOFF(hdr, magic),
   1062 		  VM_PROT_READ|VM_PROT_EXECUTE|VM_PROT_WRITE);
   1063 
   1064 	baddr = roundup(daddr + hdr.a_data, NBPG);
   1065 	bsize = daddr + dsize - baddr;
   1066         if (bsize > 0) {
   1067                 NEW_VMCMD(&vcset, vmcmd_map_zero, bsize, baddr,
   1068                     NULLVP, 0, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
   1069 	}
   1070 
   1071 	for (i = 0; i < vcset.evs_used && !error; i++) {
   1072 		struct exec_vmcmd *vcp;
   1073 
   1074 		vcp = &vcset.evs_cmds[i];
   1075 		error = (*vcp->ev_proc)(p, vcp);
   1076 	}
   1077 
   1078 	kill_vmcmds(&vcset);
   1079 
   1080 	vrele(vp);
   1081 
   1082 	return error;
   1083 }
   1084 
   1085 /*
   1086  * Execve(2). Just check the alternate emulation path, and pass it on
   1087  * to the NetBSD execve().
   1088  */
   1089 int
   1090 linux_execve(p, uap, retval)
   1091 	struct proc *p;
   1092 	struct linux_execve_args /* {
   1093 		syscallarg(char *) path;
   1094 		syscallarg(char **) argv;
   1095 		syscallarg(char **) envp;
   1096 	} */ *uap;
   1097 	register_t *retval;
   1098 {
   1099 	caddr_t sg;
   1100 
   1101 	sg = stackgap_init();
   1102 	CHECK_ALT_EXIST(p, &sg, SCARG(uap, path));
   1103 
   1104 	return execve(p, uap, retval);
   1105 }
   1106