Home | History | Annotate | Line # | Download | only in kern
exec_elf.c revision 1.37.2.3
      1 /*	$NetBSD: exec_elf.c,v 1.37.2.3 2017/07/14 06:18:25 snj Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1994, 2000, 2005 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Christos Zoulas.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Copyright (c) 1996 Christopher G. Demetriou
     34  * All rights reserved.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  * 3. The name of the author may not be used to endorse or promote products
     45  *    derived from this software without specific prior written permission
     46  *
     47  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     48  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     49  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     50  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     51  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     52  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     53  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     54  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     55  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     56  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     57  */
     58 
     59 #include <sys/cdefs.h>
     60 __KERNEL_RCSID(1, "$NetBSD: exec_elf.c,v 1.37.2.3 2017/07/14 06:18:25 snj Exp $");
     61 
     62 #ifdef _KERNEL_OPT
     63 #include "opt_pax.h"
     64 #endif /* _KERNEL_OPT */
     65 
     66 #include <sys/param.h>
     67 #include <sys/proc.h>
     68 #include <sys/kmem.h>
     69 #include <sys/namei.h>
     70 #include <sys/vnode.h>
     71 #include <sys/exec.h>
     72 #include <sys/exec_elf.h>
     73 #include <sys/syscall.h>
     74 #include <sys/signalvar.h>
     75 #include <sys/mount.h>
     76 #include <sys/stat.h>
     77 #include <sys/kauth.h>
     78 #include <sys/bitops.h>
     79 #include <sys/cprng.h>
     80 
     81 #include <sys/cpu.h>
     82 #include <machine/reg.h>
     83 
     84 #include <compat/common/compat_util.h>
     85 
     86 #include <sys/pax.h>
     87 #include <uvm/uvm_param.h>
     88 
     89 extern struct emul emul_netbsd;
     90 
     91 #define elf_check_header	ELFNAME(check_header)
     92 #define elf_copyargs		ELFNAME(copyargs)
     93 #define elf_load_file		ELFNAME(load_file)
     94 #define elf_load_psection	ELFNAME(load_psection)
     95 #define exec_elf_makecmds	ELFNAME2(exec,makecmds)
     96 #define netbsd_elf_signature	ELFNAME2(netbsd,signature)
     97 #define netbsd_elf_note       	ELFNAME2(netbsd,note)
     98 #define netbsd_elf_probe	ELFNAME2(netbsd,probe)
     99 #define	coredump		ELFNAMEEND(coredump)
    100 #define	elf_free_emul_arg	ELFNAME(free_emul_arg)
    101 
    102 int	elf_load_file(struct lwp *, struct exec_package *, char *,
    103 	    struct exec_vmcmd_set *, u_long *, struct elf_args *, Elf_Addr *);
    104 void	elf_load_psection(struct exec_vmcmd_set *, struct vnode *,
    105 	    const Elf_Phdr *, Elf_Addr *, u_long *, int *, int);
    106 
    107 int	netbsd_elf_signature(struct lwp *, struct exec_package *, Elf_Ehdr *);
    108 int	netbsd_elf_note(struct exec_package *, const Elf_Nhdr *, const char *,
    109 	    const char *);
    110 int	netbsd_elf_probe(struct lwp *, struct exec_package *, void *, char *,
    111 	    vaddr_t *);
    112 
    113 static void	elf_free_emul_arg(void *);
    114 
    115 /* round up and down to page boundaries. */
    116 #define	ELF_ROUND(a, b)		(((a) + (b) - 1) & ~((b) - 1))
    117 #define	ELF_TRUNC(a, b)		((a) & ~((b) - 1))
    118 
    119 /*
    120  * Arbitrary limits to avoid DoS for excessive memory allocation.
    121  */
    122 #define MAXPHNUM	128
    123 #define MAXSHNUM	32768
    124 #define MAXNOTESIZE	1024
    125 
    126 static void
    127 elf_placedynexec(struct lwp *l, struct exec_package *epp, Elf_Ehdr *eh,
    128     Elf_Phdr *ph)
    129 {
    130 	Elf_Addr align, offset;
    131 	int i;
    132 
    133 	for (align = i = 0; i < eh->e_phnum; i++)
    134 		if (ph[i].p_type == PT_LOAD && ph[i].p_align > align)
    135 			align = ph[i].p_align;
    136 
    137 #ifdef PAX_ASLR
    138 	if (pax_aslr_active(l)) {
    139 		size_t pax_align, l2, delta;
    140 		uint32_t r;
    141 
    142 		pax_align = align;
    143 
    144 		r = cprng_fast32();
    145 
    146 		if (pax_align == 0)
    147 			pax_align = PGSHIFT;
    148 		l2 = ilog2(pax_align);
    149 		delta = PAX_ASLR_DELTA(r, l2, PAX_ASLR_DELTA_EXEC_LEN);
    150 		offset = ELF_TRUNC(delta, pax_align) + PAGE_SIZE;
    151 #ifdef PAX_ASLR_DEBUG
    152 		uprintf("r=0x%x l2=0x%zx PGSHIFT=0x%x Delta=0x%zx\n", r, l2,
    153 		    PGSHIFT, delta);
    154 		uprintf("pax offset=0x%llx entry=0x%llx\n",
    155 		    (unsigned long long)offset,
    156 		    (unsigned long long)eh->e_entry);
    157 #endif /* PAX_ASLR_DEBUG */
    158 	} else
    159 #endif /* PAX_ASLR */
    160 		offset = MAX(align, PAGE_SIZE);
    161 
    162 	offset += epp->ep_vm_minaddr;
    163 
    164 	for (i = 0; i < eh->e_phnum; i++)
    165 		ph[i].p_vaddr += offset;
    166 	eh->e_entry += offset;
    167 }
    168 
    169 /*
    170  * Copy arguments onto the stack in the normal way, but add some
    171  * extra information in case of dynamic binding.
    172  */
    173 int
    174 elf_copyargs(struct lwp *l, struct exec_package *pack,
    175     struct ps_strings *arginfo, char **stackp, void *argp)
    176 {
    177 	size_t len, vlen;
    178 	AuxInfo ai[ELF_AUX_ENTRIES], *a, *execname;
    179 	struct elf_args *ap;
    180 	int error;
    181 
    182 	if ((error = copyargs(l, pack, arginfo, stackp, argp)) != 0)
    183 		return error;
    184 
    185 	a = ai;
    186 	execname = NULL;
    187 
    188 	/*
    189 	 * Push extra arguments on the stack needed by dynamically
    190 	 * linked binaries
    191 	 */
    192 	if ((ap = (struct elf_args *)pack->ep_emul_arg)) {
    193 		struct vattr *vap = pack->ep_vap;
    194 
    195 		a->a_type = AT_PHDR;
    196 		a->a_v = ap->arg_phaddr;
    197 		a++;
    198 
    199 		a->a_type = AT_PHENT;
    200 		a->a_v = ap->arg_phentsize;
    201 		a++;
    202 
    203 		a->a_type = AT_PHNUM;
    204 		a->a_v = ap->arg_phnum;
    205 		a++;
    206 
    207 		a->a_type = AT_PAGESZ;
    208 		a->a_v = PAGE_SIZE;
    209 		a++;
    210 
    211 		a->a_type = AT_BASE;
    212 		a->a_v = ap->arg_interp;
    213 		a++;
    214 
    215 		a->a_type = AT_FLAGS;
    216 		a->a_v = 0;
    217 		a++;
    218 
    219 		a->a_type = AT_ENTRY;
    220 		a->a_v = ap->arg_entry;
    221 		a++;
    222 
    223 		a->a_type = AT_EUID;
    224 		if (vap->va_mode & S_ISUID)
    225 			a->a_v = vap->va_uid;
    226 		else
    227 			a->a_v = kauth_cred_geteuid(l->l_cred);
    228 		a++;
    229 
    230 		a->a_type = AT_RUID;
    231 		a->a_v = kauth_cred_getuid(l->l_cred);
    232 		a++;
    233 
    234 		a->a_type = AT_EGID;
    235 		if (vap->va_mode & S_ISGID)
    236 			a->a_v = vap->va_gid;
    237 		else
    238 			a->a_v = kauth_cred_getegid(l->l_cred);
    239 		a++;
    240 
    241 		a->a_type = AT_RGID;
    242 		a->a_v = kauth_cred_getgid(l->l_cred);
    243 		a++;
    244 
    245 		a->a_type = AT_STACKBASE;
    246 		a->a_v = l->l_proc->p_stackbase;
    247 		a++;
    248 
    249 		if (pack->ep_path) {
    250 			execname = a;
    251 			a->a_type = AT_SUN_EXECNAME;
    252 			a++;
    253 		}
    254 
    255 		exec_free_emul_arg(pack);
    256 	}
    257 
    258 	a->a_type = AT_NULL;
    259 	a->a_v = 0;
    260 	a++;
    261 
    262 	vlen = (a - ai) * sizeof(ai[0]);
    263 
    264 	KASSERT(vlen <= sizeof(ai));
    265 
    266 	if (execname) {
    267 		char *path = pack->ep_path;
    268 		execname->a_v = (uintptr_t)(*stackp + vlen);
    269 		len = strlen(path) + 1;
    270 		if ((error = copyout(path, (*stackp + vlen), len)) != 0)
    271 			return error;
    272 		len = ALIGN(len);
    273 	} else
    274 		len = 0;
    275 
    276 	if ((error = copyout(ai, *stackp, vlen)) != 0)
    277 		return error;
    278 	*stackp += vlen + len;
    279 
    280 	return 0;
    281 }
    282 
    283 /*
    284  * elf_check_header():
    285  *
    286  * Check header for validity; return 0 of ok ENOEXEC if error
    287  */
    288 int
    289 elf_check_header(Elf_Ehdr *eh, int type)
    290 {
    291 
    292 	if (memcmp(eh->e_ident, ELFMAG, SELFMAG) != 0 ||
    293 	    eh->e_ident[EI_CLASS] != ELFCLASS)
    294 		return ENOEXEC;
    295 
    296 	switch (eh->e_machine) {
    297 
    298 	ELFDEFNNAME(MACHDEP_ID_CASES)
    299 
    300 	default:
    301 		return ENOEXEC;
    302 	}
    303 
    304 	if (ELF_EHDR_FLAGS_OK(eh) == 0)
    305 		return ENOEXEC;
    306 
    307 	if (eh->e_type != type)
    308 		return ENOEXEC;
    309 
    310 	if (eh->e_shnum > MAXSHNUM || eh->e_phnum > MAXPHNUM)
    311 		return ENOEXEC;
    312 
    313 	return 0;
    314 }
    315 
    316 /*
    317  * elf_load_psection():
    318  *
    319  * Load a psection at the appropriate address
    320  */
    321 void
    322 elf_load_psection(struct exec_vmcmd_set *vcset, struct vnode *vp,
    323     const Elf_Phdr *ph, Elf_Addr *addr, u_long *size, int *prot, int flags)
    324 {
    325 	u_long msize, psize, rm, rf;
    326 	long diff, offset;
    327 
    328 	/*
    329 	 * If the user specified an address, then we load there.
    330 	 */
    331 	if (*addr == ELFDEFNNAME(NO_ADDR))
    332 		*addr = ph->p_vaddr;
    333 
    334 	if (ph->p_align > 1) {
    335 		/*
    336 		 * Make sure we are virtually aligned as we are supposed to be.
    337 		 */
    338 		diff = ph->p_vaddr - ELF_TRUNC(ph->p_vaddr, ph->p_align);
    339 		KASSERT(*addr - diff == ELF_TRUNC(*addr, ph->p_align));
    340 		/*
    341 		 * But make sure to not map any pages before the start of the
    342 		 * psection by limiting the difference to within a page.
    343 		 */
    344 		diff &= PAGE_MASK;
    345 	} else
    346 		diff = 0;
    347 
    348 	*prot |= (ph->p_flags & PF_R) ? VM_PROT_READ : 0;
    349 	*prot |= (ph->p_flags & PF_W) ? VM_PROT_WRITE : 0;
    350 	*prot |= (ph->p_flags & PF_X) ? VM_PROT_EXECUTE : 0;
    351 
    352 	/*
    353 	 * Adjust everything so it all starts on a page boundary.
    354 	 */
    355 	*addr -= diff;
    356 	offset = ph->p_offset - diff;
    357 	*size = ph->p_filesz + diff;
    358 	msize = ph->p_memsz + diff;
    359 
    360 	if (ph->p_align >= PAGE_SIZE) {
    361 		if ((ph->p_flags & PF_W) != 0) {
    362 			/*
    363 			 * Because the pagedvn pager can't handle zero fill
    364 			 * of the last data page if it's not page aligned we
    365 			 * map the last page readvn.
    366 			 */
    367 			psize = trunc_page(*size);
    368 		} else {
    369 			psize = round_page(*size);
    370 		}
    371 	} else {
    372 		psize = *size;
    373 	}
    374 
    375 	if (psize > 0) {
    376 		NEW_VMCMD2(vcset, ph->p_align < PAGE_SIZE ?
    377 		    vmcmd_map_readvn : vmcmd_map_pagedvn, psize, *addr, vp,
    378 		    offset, *prot, flags);
    379 		flags &= VMCMD_RELATIVE;
    380 	}
    381 	if (psize < *size) {
    382 		NEW_VMCMD2(vcset, vmcmd_map_readvn, *size - psize,
    383 		    *addr + psize, vp, offset + psize, *prot, flags);
    384 	}
    385 
    386 	/*
    387 	 * Check if we need to extend the size of the segment (does
    388 	 * bss extend page the next page boundary)?
    389 	 */
    390 	rm = round_page(*addr + msize);
    391 	rf = round_page(*addr + *size);
    392 
    393 	if (rm != rf) {
    394 		NEW_VMCMD2(vcset, vmcmd_map_zero, rm - rf, rf, NULLVP,
    395 		    0, *prot, flags & VMCMD_RELATIVE);
    396 		*size = msize;
    397 	}
    398 }
    399 
    400 /*
    401  * elf_load_file():
    402  *
    403  * Load a file (interpreter/library) pointed to by path
    404  * [stolen from coff_load_shlib()]. Made slightly generic
    405  * so it might be used externally.
    406  */
    407 int
    408 elf_load_file(struct lwp *l, struct exec_package *epp, char *path,
    409     struct exec_vmcmd_set *vcset, u_long *entryoff, struct elf_args *ap,
    410     Elf_Addr *last)
    411 {
    412 	int error, i;
    413 	struct vnode *vp;
    414 	struct vattr attr;
    415 	Elf_Ehdr eh;
    416 	Elf_Phdr *ph = NULL;
    417 	const Elf_Phdr *ph0;
    418 	const Elf_Phdr *base_ph;
    419 	const Elf_Phdr *last_ph;
    420 	u_long phsize;
    421 	Elf_Addr addr = *last;
    422 	struct proc *p;
    423 	bool use_topdown;
    424 
    425 	p = l->l_proc;
    426 
    427 	KASSERT(p->p_vmspace);
    428 	if (__predict_true(p->p_vmspace != proc0.p_vmspace))
    429 		use_topdown = p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN;
    430 	else
    431 #ifdef __USING_TOPDOWN_VM
    432 		use_topdown = true;
    433 #else
    434 		use_topdown = false;
    435 #endif
    436 
    437 	/*
    438 	 * 1. open file
    439 	 * 2. read filehdr
    440 	 * 3. map text, data, and bss out of it using VM_*
    441 	 */
    442 	vp = epp->ep_interp;
    443 	if (vp == NULL) {
    444 		error = emul_find_interp(l, epp, path);
    445 		if (error != 0)
    446 			return error;
    447 		vp = epp->ep_interp;
    448 	}
    449 	/* We'll tidy this ourselves - otherwise we have locking issues */
    450 	epp->ep_interp = NULL;
    451 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    452 
    453 	/*
    454 	 * Similarly, if it's not marked as executable, or it's not a regular
    455 	 * file, we don't allow it to be used.
    456 	 */
    457 	if (vp->v_type != VREG) {
    458 		error = EACCES;
    459 		goto badunlock;
    460 	}
    461 	if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
    462 		goto badunlock;
    463 
    464 	/* get attributes */
    465 	if ((error = VOP_GETATTR(vp, &attr, l->l_cred)) != 0)
    466 		goto badunlock;
    467 
    468 	/*
    469 	 * Check mount point.  Though we're not trying to exec this binary,
    470 	 * we will be executing code from it, so if the mount point
    471 	 * disallows execution or set-id-ness, we punt or kill the set-id.
    472 	 */
    473 	if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
    474 		error = EACCES;
    475 		goto badunlock;
    476 	}
    477 	if (vp->v_mount->mnt_flag & MNT_NOSUID)
    478 		epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
    479 
    480 #ifdef notyet /* XXX cgd 960926 */
    481 	XXX cgd 960926: (maybe) VOP_OPEN it (and VOP_CLOSE in copyargs?)
    482 
    483 	XXXps: this problem will make it impossible to use an interpreter
    484 	from a file system which actually does something in VOP_OPEN
    485 #endif
    486 
    487 	error = vn_marktext(vp);
    488 	if (error)
    489 		goto badunlock;
    490 
    491 	VOP_UNLOCK(vp);
    492 
    493 	if ((error = exec_read_from(l, vp, 0, &eh, sizeof(eh))) != 0)
    494 		goto bad;
    495 
    496 	if ((error = elf_check_header(&eh, ET_DYN)) != 0)
    497 		goto bad;
    498 
    499 	if (eh.e_phnum > MAXPHNUM || eh.e_phnum == 0) {
    500 		error = ENOEXEC;
    501 		goto bad;
    502 	}
    503 
    504 	phsize = eh.e_phnum * sizeof(Elf_Phdr);
    505 	ph = kmem_alloc(phsize, KM_SLEEP);
    506 
    507 	if ((error = exec_read_from(l, vp, eh.e_phoff, ph, phsize)) != 0)
    508 		goto bad;
    509 
    510 #ifdef ELF_INTERP_NON_RELOCATABLE
    511 	/*
    512 	 * Evil hack:  Only MIPS should be non-relocatable, and the
    513 	 * psections should have a high address (typically 0x5ffe0000).
    514 	 * If it's now relocatable, it should be linked at 0 and the
    515 	 * psections should have zeros in the upper part of the address.
    516 	 * Otherwise, force the load at the linked address.
    517 	 */
    518 	if (*last == ELF_LINK_ADDR && (ph->p_vaddr & 0xffff0000) == 0)
    519 		*last = ELFDEFNNAME(NO_ADDR);
    520 #endif
    521 
    522 	/*
    523 	 * If no position to load the interpreter was set by a probe
    524 	 * function, pick the same address that a non-fixed mmap(0, ..)
    525 	 * would (i.e. something safely out of the way).
    526 	 */
    527 	if (*last == ELFDEFNNAME(NO_ADDR)) {
    528 		u_long limit = 0;
    529 		/*
    530 		 * Find the start and ending addresses of the psections to
    531 		 * be loaded.  This will give us the size.
    532 		 */
    533 		for (i = 0, ph0 = ph, base_ph = NULL; i < eh.e_phnum;
    534 		     i++, ph0++) {
    535 			if (ph0->p_type == PT_LOAD) {
    536 				u_long psize = ph0->p_vaddr + ph0->p_memsz;
    537 				if (base_ph == NULL)
    538 					base_ph = ph0;
    539 				if (psize > limit)
    540 					limit = psize;
    541 			}
    542 		}
    543 
    544 		if (base_ph == NULL) {
    545 			error = ENOEXEC;
    546 			goto bad;
    547 		}
    548 
    549 		/*
    550 		 * Now compute the size and load address.
    551 		 */
    552 		addr = (*epp->ep_esch->es_emul->e_vm_default_addr)(p,
    553 		    epp->ep_daddr,
    554 		    round_page(limit) - trunc_page(base_ph->p_vaddr));
    555 	} else
    556 		addr = *last; /* may be ELF_LINK_ADDR */
    557 
    558 	/*
    559 	 * Load all the necessary sections
    560 	 */
    561 	for (i = 0, ph0 = ph, base_ph = NULL, last_ph = NULL;
    562 	     i < eh.e_phnum; i++, ph0++) {
    563 		switch (ph0->p_type) {
    564 		case PT_LOAD: {
    565 			u_long size;
    566 			int prot = 0;
    567 			int flags;
    568 
    569 			if (base_ph == NULL) {
    570 				/*
    571 				 * First encountered psection is always the
    572 				 * base psection.  Make sure it's aligned
    573 				 * properly (align down for topdown and align
    574 				 * upwards for not topdown).
    575 				 */
    576 				base_ph = ph0;
    577 				flags = VMCMD_BASE;
    578 				if (addr == ELF_LINK_ADDR)
    579 					addr = ph0->p_vaddr;
    580 				if (use_topdown)
    581 					addr = ELF_TRUNC(addr, ph0->p_align);
    582 				else
    583 					addr = ELF_ROUND(addr, ph0->p_align);
    584 			} else {
    585 				u_long limit = round_page(last_ph->p_vaddr
    586 				    + last_ph->p_memsz);
    587 				u_long base = trunc_page(ph0->p_vaddr);
    588 
    589 				/*
    590 				 * If there is a gap in between the psections,
    591 				 * map it as inaccessible so nothing else
    592 				 * mmap'ed will be placed there.
    593 				 */
    594 				if (limit != base) {
    595 					NEW_VMCMD2(vcset, vmcmd_map_zero,
    596 					    base - limit,
    597 					    limit - base_ph->p_vaddr, NULLVP,
    598 					    0, VM_PROT_NONE, VMCMD_RELATIVE);
    599 				}
    600 
    601 				addr = ph0->p_vaddr - base_ph->p_vaddr;
    602 				flags = VMCMD_RELATIVE;
    603 			}
    604 			last_ph = ph0;
    605 			elf_load_psection(vcset, vp, &ph[i], &addr,
    606 			    &size, &prot, flags);
    607 			/*
    608 			 * If entry is within this psection then this
    609 			 * must contain the .text section.  *entryoff is
    610 			 * relative to the base psection.
    611 			 */
    612 			if (eh.e_entry >= ph0->p_vaddr &&
    613 			    eh.e_entry < (ph0->p_vaddr + size)) {
    614 				*entryoff = eh.e_entry - base_ph->p_vaddr;
    615 			}
    616 			addr += size;
    617 			break;
    618 		}
    619 
    620 		case PT_DYNAMIC:
    621 		case PT_PHDR:
    622 			break;
    623 
    624 		case PT_NOTE:
    625 			break;
    626 
    627 		default:
    628 			break;
    629 		}
    630 	}
    631 
    632 	kmem_free(ph, phsize);
    633 	/*
    634 	 * This value is ignored if TOPDOWN.
    635 	 */
    636 	*last = addr;
    637 	vrele(vp);
    638 	return 0;
    639 
    640 badunlock:
    641 	VOP_UNLOCK(vp);
    642 
    643 bad:
    644 	if (ph != NULL)
    645 		kmem_free(ph, phsize);
    646 #ifdef notyet /* XXX cgd 960926 */
    647 	(maybe) VOP_CLOSE it
    648 #endif
    649 	vrele(vp);
    650 	return error;
    651 }
    652 
    653 /*
    654  * exec_elf_makecmds(): Prepare an Elf binary's exec package
    655  *
    656  * First, set of the various offsets/lengths in the exec package.
    657  *
    658  * Then, mark the text image busy (so it can be demand paged) or error
    659  * out if this is not possible.  Finally, set up vmcmds for the
    660  * text, data, bss, and stack segments.
    661  */
    662 int
    663 exec_elf_makecmds(struct lwp *l, struct exec_package *epp)
    664 {
    665 	Elf_Ehdr *eh = epp->ep_hdr;
    666 	Elf_Phdr *ph, *pp;
    667 	Elf_Addr phdr = 0, computed_phdr = 0, pos = 0, end_text = 0;
    668 	int error, i, nload;
    669 	char *interp = NULL;
    670 	u_long phsize;
    671 	struct proc *p;
    672 	struct elf_args *ap = NULL;
    673 	bool is_dyn;
    674 
    675 	if (epp->ep_hdrvalid < sizeof(Elf_Ehdr))
    676 		return ENOEXEC;
    677 
    678 	is_dyn = elf_check_header(eh, ET_DYN) == 0;
    679 	/*
    680 	 * XXX allow for executing shared objects. It seems silly
    681 	 * but other ELF-based systems allow it as well.
    682 	 */
    683 	if (elf_check_header(eh, ET_EXEC) != 0 && !is_dyn)
    684 		return ENOEXEC;
    685 
    686 	if (eh->e_phnum > MAXPHNUM || eh->e_phnum == 0)
    687 		return ENOEXEC;
    688 
    689 	error = vn_marktext(epp->ep_vp);
    690 	if (error)
    691 		return error;
    692 
    693 	/*
    694 	 * Allocate space to hold all the program headers, and read them
    695 	 * from the file
    696 	 */
    697 	p = l->l_proc;
    698 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
    699 	ph = kmem_alloc(phsize, KM_SLEEP);
    700 
    701 	if ((error = exec_read_from(l, epp->ep_vp, eh->e_phoff, ph, phsize)) !=
    702 	    0)
    703 		goto bad;
    704 
    705 	epp->ep_taddr = epp->ep_tsize = ELFDEFNNAME(NO_ADDR);
    706 	epp->ep_daddr = epp->ep_dsize = ELFDEFNNAME(NO_ADDR);
    707 
    708 	for (i = 0; i < eh->e_phnum; i++) {
    709 		pp = &ph[i];
    710 		if (pp->p_type == PT_INTERP) {
    711 			if (pp->p_filesz >= MAXPATHLEN) {
    712 				error = ENOEXEC;
    713 				goto bad;
    714 			}
    715 			interp = PNBUF_GET();
    716 			interp[0] = '\0';
    717 			if ((error = exec_read_from(l, epp->ep_vp,
    718 			    pp->p_offset, interp, pp->p_filesz)) != 0)
    719 				goto bad;
    720 			break;
    721 		}
    722 	}
    723 
    724 	/*
    725 	 * On the same architecture, we may be emulating different systems.
    726 	 * See which one will accept this executable.
    727 	 *
    728 	 * Probe functions would normally see if the interpreter (if any)
    729 	 * exists. Emulation packages may possibly replace the interpreter in
    730 	 * interp[] with a changed path (/emul/xxx/<path>).
    731 	 */
    732 	pos = ELFDEFNNAME(NO_ADDR);
    733 	if (epp->ep_esch->u.elf_probe_func) {
    734 		vaddr_t startp = (vaddr_t)pos;
    735 
    736 		error = (*epp->ep_esch->u.elf_probe_func)(l, epp, eh, interp,
    737 							  &startp);
    738 		if (error)
    739 			goto bad;
    740 		pos = (Elf_Addr)startp;
    741 	}
    742 
    743 #if defined(PAX_MPROTECT) || defined(PAX_SEGVGUARD) || defined(PAX_ASLR)
    744 	p->p_pax = epp->ep_pax_flags;
    745 #endif /* PAX_MPROTECT || PAX_SEGVGUARD || PAX_ASLR */
    746 
    747 	if (is_dyn)
    748 		elf_placedynexec(l, epp, eh, ph);
    749 
    750 	/*
    751 	 * Load all the necessary sections
    752 	 */
    753 	for (i = nload = 0; i < eh->e_phnum; i++) {
    754 		Elf_Addr  addr = ELFDEFNNAME(NO_ADDR);
    755 		u_long size = 0;
    756 		int prot = 0;
    757 
    758 		pp = &ph[i];
    759 
    760 		switch (ph[i].p_type) {
    761 		case PT_LOAD:
    762 			elf_load_psection(&epp->ep_vmcmds, epp->ep_vp,
    763 			    &ph[i], &addr, &size, &prot, VMCMD_FIXED);
    764 
    765 			/*
    766 			 * Consider this as text segment, if it is executable.
    767 			 * If there is more than one text segment, pick the
    768 			 * largest.
    769 			 */
    770 			if (ph[i].p_flags & PF_X) {
    771 				if (epp->ep_taddr == ELFDEFNNAME(NO_ADDR) ||
    772 				    size > epp->ep_tsize) {
    773 					epp->ep_taddr = addr;
    774 					epp->ep_tsize = size;
    775 				}
    776 				end_text = addr + size;
    777 			} else {
    778 				epp->ep_daddr = addr;
    779 				epp->ep_dsize = size;
    780 			}
    781 			if (ph[i].p_offset == 0) {
    782 				computed_phdr = ph[i].p_vaddr + eh->e_phoff;
    783 			}
    784 			break;
    785 
    786 		case PT_SHLIB:
    787 			/* SCO has these sections. */
    788 		case PT_INTERP:
    789 			/* Already did this one. */
    790 		case PT_DYNAMIC:
    791 			break;
    792 		case PT_NOTE:
    793 			break;
    794 		case PT_PHDR:
    795 			/* Note address of program headers (in text segment) */
    796 			phdr = pp->p_vaddr;
    797 			break;
    798 
    799 		default:
    800 			/*
    801 			 * Not fatal; we don't need to understand everything.
    802 			 */
    803 			break;
    804 		}
    805 	}
    806 	if (interp || (epp->ep_flags & EXEC_FORCEAUX) != 0) {
    807 		ap = kmem_alloc(sizeof(*ap), KM_SLEEP);
    808 		ap->arg_interp = (vaddr_t)NULL;
    809 	}
    810 
    811 	if (epp->ep_daddr == ELFDEFNNAME(NO_ADDR)) {
    812 		epp->ep_daddr = round_page(end_text);
    813 		epp->ep_dsize = 0;
    814 	}
    815 
    816 	/*
    817 	 * Check if we found a dynamically linked binary and arrange to load
    818 	 * its interpreter
    819 	 */
    820 	if (interp) {
    821 		int j = epp->ep_vmcmds.evs_used;
    822 		u_long interp_offset;
    823 
    824 		if ((error = elf_load_file(l, epp, interp,
    825 		    &epp->ep_vmcmds, &interp_offset, ap, &pos)) != 0) {
    826 			kmem_free(ap, sizeof(*ap));
    827 			goto bad;
    828 		}
    829 		ap->arg_interp = epp->ep_vmcmds.evs_cmds[j].ev_addr;
    830 		epp->ep_entry = ap->arg_interp + interp_offset;
    831 		PNBUF_PUT(interp);
    832 	} else
    833 		epp->ep_entry = eh->e_entry;
    834 
    835 	if (ap) {
    836 		ap->arg_phaddr = phdr ? phdr : computed_phdr;
    837 		ap->arg_phentsize = eh->e_phentsize;
    838 		ap->arg_phnum = eh->e_phnum;
    839 		ap->arg_entry = eh->e_entry;
    840 		epp->ep_emul_arg = ap;
    841 		epp->ep_emul_arg_free = elf_free_emul_arg;
    842 	}
    843 
    844 #ifdef ELF_MAP_PAGE_ZERO
    845 	/* Dell SVR4 maps page zero, yeuch! */
    846 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_readvn, PAGE_SIZE, 0,
    847 	    epp->ep_vp, 0, VM_PROT_READ);
    848 #endif
    849 	kmem_free(ph, phsize);
    850 	return (*epp->ep_esch->es_setup_stack)(l, epp);
    851 
    852 bad:
    853 	if (interp)
    854 		PNBUF_PUT(interp);
    855 	exec_free_emul_arg(epp);
    856 	kmem_free(ph, phsize);
    857 	kill_vmcmds(&epp->ep_vmcmds);
    858 	return error;
    859 }
    860 
    861 int
    862 netbsd_elf_signature(struct lwp *l, struct exec_package *epp,
    863     Elf_Ehdr *eh)
    864 {
    865 	size_t i;
    866 	Elf_Phdr *ph;
    867 	size_t phsize;
    868 	char *nbuf;
    869 	int error;
    870 	int isnetbsd = 0;
    871 
    872 	epp->ep_pax_flags = 0;
    873 
    874 	if (eh->e_phnum > MAXPHNUM || eh->e_phnum == 0)
    875 		return ENOEXEC;
    876 
    877 	phsize = eh->e_phnum * sizeof(Elf_Phdr);
    878 	ph = kmem_alloc(phsize, KM_SLEEP);
    879 	error = exec_read_from(l, epp->ep_vp, eh->e_phoff, ph, phsize);
    880 	if (error)
    881 		goto out;
    882 
    883 	nbuf = kmem_alloc(MAXNOTESIZE, KM_SLEEP);
    884 	for (i = 0; i < eh->e_phnum; i++) {
    885 		const char *nptr;
    886 		size_t nlen;
    887 
    888 		if (ph[i].p_type != PT_NOTE ||
    889 		    ph[i].p_filesz > MAXNOTESIZE)
    890 			continue;
    891 
    892 		nlen = ph[i].p_filesz;
    893 		error = exec_read_from(l, epp->ep_vp, ph[i].p_offset,
    894 				       nbuf, nlen);
    895 		if (error)
    896 			continue;
    897 
    898 		nptr = nbuf;
    899 		while (nlen > 0) {
    900 			const Elf_Nhdr *np;
    901 			const char *ndata, *ndesc;
    902 
    903 			/* note header */
    904 			np = (const Elf_Nhdr *)nptr;
    905 			if (nlen < sizeof(*np)) {
    906 				break;
    907 			}
    908 			nptr += sizeof(*np);
    909 			nlen -= sizeof(*np);
    910 
    911 			/* note name */
    912 			ndata = nptr;
    913 			if (nlen < roundup(np->n_namesz, 4)) {
    914 				break;
    915 			}
    916 			nptr += roundup(np->n_namesz, 4);
    917 			nlen -= roundup(np->n_namesz, 4);
    918 
    919 			/* note description */
    920 			ndesc = nptr;
    921 			if (nlen < roundup(np->n_descsz, 4)) {
    922 				break;
    923 			}
    924 			nptr += roundup(np->n_descsz, 4);
    925 			nlen -= roundup(np->n_descsz, 4);
    926 
    927 			isnetbsd |= netbsd_elf_note(epp, np, ndata, ndesc);
    928 		}
    929 	}
    930 	kmem_free(nbuf, MAXNOTESIZE);
    931 
    932 	error = isnetbsd ? 0 : ENOEXEC;
    933 out:
    934 	kmem_free(ph, phsize);
    935 	return error;
    936 }
    937 
    938 int
    939 netbsd_elf_note(struct exec_package *epp,
    940 		const Elf_Nhdr *np, const char *ndata, const char *ndesc)
    941 {
    942 	int isnetbsd = 0;
    943 
    944 	switch (np->n_type) {
    945 	case ELF_NOTE_TYPE_NETBSD_TAG:
    946 		if (np->n_namesz != ELF_NOTE_NETBSD_NAMESZ ||
    947 		    np->n_descsz != ELF_NOTE_NETBSD_DESCSZ ||
    948 		    memcmp(ndata, ELF_NOTE_NETBSD_NAME,
    949 		    ELF_NOTE_NETBSD_NAMESZ))
    950 			goto bad;
    951 		isnetbsd = 1;
    952 		break;
    953 
    954 	case ELF_NOTE_TYPE_PAX_TAG:
    955 		if (np->n_namesz != ELF_NOTE_PAX_NAMESZ ||
    956 		    np->n_descsz != ELF_NOTE_PAX_DESCSZ ||
    957 		    memcmp(ndata, ELF_NOTE_PAX_NAME,
    958 		    ELF_NOTE_PAX_NAMESZ)) {
    959 bad:
    960 		    /*
    961 		     * Ignore GNU tags
    962 		     */
    963 		    if (np->n_namesz == ELF_NOTE_GNU_NAMESZ &&
    964 			memcmp(ndata, ELF_NOTE_GNU_NAME,
    965 			ELF_NOTE_GNU_NAMESZ) == 0)
    966 				break;
    967 #ifdef DIAGNOSTIC
    968 			printf("%s: bad tag %d: "
    969 			    "[%d %d, %d %d, %*.*s %*.*s]\n",
    970 			    epp->ep_kname,
    971 			    np->n_type,
    972 			    np->n_namesz, ELF_NOTE_PAX_NAMESZ,
    973 			    np->n_descsz, ELF_NOTE_PAX_DESCSZ,
    974 			    ELF_NOTE_PAX_NAMESZ,
    975 			    ELF_NOTE_PAX_NAMESZ,
    976 			    ndata,
    977 			    ELF_NOTE_PAX_NAMESZ,
    978 			    ELF_NOTE_PAX_NAMESZ,
    979 			    ELF_NOTE_PAX_NAME);
    980 #endif
    981 			break;
    982 		}
    983 		(void)memcpy(&epp->ep_pax_flags,
    984 		    ndata + ELF_NOTE_PAX_NAMESZ,
    985 		    sizeof(epp->ep_pax_flags));
    986 		break;
    987 
    988 	case ELF_NOTE_TYPE_SUSE_TAG:
    989 		break;
    990 
    991 	default:
    992 #ifdef DIAGNOSTIC
    993 		printf("%s: unknown note type %d\n", epp->ep_kname,
    994 		    np->n_type);
    995 #endif
    996 		break;
    997 	}
    998 
    999 	return isnetbsd;
   1000 }
   1001 
   1002 int
   1003 netbsd_elf_probe(struct lwp *l, struct exec_package *epp, void *eh, char *itp,
   1004     vaddr_t *pos)
   1005 {
   1006 	int error;
   1007 
   1008 	if ((error = netbsd_elf_signature(l, epp, eh)) != 0)
   1009 		return error;
   1010 #ifdef ELF_MD_PROBE_FUNC
   1011 	if ((error = ELF_MD_PROBE_FUNC(l, epp, eh, itp, pos)) != 0)
   1012 		return error;
   1013 #elif defined(ELF_INTERP_NON_RELOCATABLE)
   1014 	*pos = ELF_LINK_ADDR;
   1015 #endif
   1016 	epp->ep_flags |= EXEC_FORCEAUX;
   1017 	return 0;
   1018 }
   1019 
   1020 void
   1021 elf_free_emul_arg(void *arg)
   1022 {
   1023 	struct elf_args *ap = arg;
   1024 	KASSERT(ap != NULL);
   1025 	kmem_free(ap, sizeof(*ap));
   1026 }
   1027