Home | History | Annotate | Line # | Download | only in kern
kern_ktrace.c revision 1.91
      1 /*	$NetBSD: kern_ktrace.c,v 1.91 2004/06/24 15:06:35 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1989, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  *
     31  *	@(#)kern_ktrace.c	8.5 (Berkeley) 5/14/95
     32  */
     33 
     34 #include <sys/cdefs.h>
     35 __KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.91 2004/06/24 15:06:35 christos Exp $");
     36 
     37 #include "opt_ktrace.h"
     38 #include "opt_compat_mach.h"
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/proc.h>
     43 #include <sys/file.h>
     44 #include <sys/namei.h>
     45 #include <sys/vnode.h>
     46 #include <sys/ktrace.h>
     47 #include <sys/malloc.h>
     48 #include <sys/syslog.h>
     49 #include <sys/filedesc.h>
     50 #include <sys/ioctl.h>
     51 
     52 #include <sys/mount.h>
     53 #include <sys/sa.h>
     54 #include <sys/syscallargs.h>
     55 
     56 #ifdef KTRACE
     57 
     58 void	ktrinitheader(struct ktr_header *, struct proc *, int);
     59 int	ktrwrite(struct proc *, struct ktr_header *);
     60 int	ktrace_common(struct proc *, int, int, int, struct file *);
     61 int	ktrops(struct proc *, struct proc *, int, int, struct file *);
     62 int	ktrsetchildren(struct proc *, struct proc *, int, int,
     63 	    struct file *);
     64 int	ktrcanset(struct proc *, struct proc *);
     65 int	ktrsamefile(struct file *, struct file *);
     66 
     67 /*
     68  * "deep" compare of two files for the purposes of clearing a trace.
     69  * Returns true if they're the same open file, or if they point at the
     70  * same underlying vnode/socket.
     71  */
     72 
     73 int
     74 ktrsamefile(struct file *f1, struct file *f2)
     75 {
     76 
     77 	return ((f1 == f2) ||
     78 	    ((f1 != NULL) && (f2 != NULL) &&
     79 		(f1->f_type == f2->f_type) &&
     80 		(f1->f_data == f2->f_data)));
     81 }
     82 
     83 void
     84 ktrderef(struct proc *p)
     85 {
     86 	struct file *fp = p->p_tracep;
     87 	p->p_traceflag = 0;
     88 	if (fp == NULL)
     89 		return;
     90 	p->p_tracep = NULL;
     91 
     92 	simple_lock(&fp->f_slock);
     93 	FILE_USE(fp);
     94 
     95 	/*
     96 	 * ktrace file descriptor can't be watched (are not visible to
     97 	 * userspace), so no kqueue stuff here.
     98 	 * XXX: The above comment is wrong, because the fktrace file
     99 	 * descriptor is available in userland.
    100 	 */
    101 	closef(fp, NULL);
    102 }
    103 
    104 void
    105 ktradref(struct proc *p)
    106 {
    107 	struct file *fp = p->p_tracep;
    108 
    109 	fp->f_count++;
    110 }
    111 
    112 void
    113 ktrinitheader(struct ktr_header *kth, struct proc *p, int type)
    114 {
    115 
    116 	(void)memset(kth, 0, sizeof(*kth));
    117 	kth->ktr_type = type;
    118 	microtime(&kth->ktr_time);
    119 	kth->ktr_pid = p->p_pid;
    120 	memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN);
    121 }
    122 
    123 int
    124 ktrsyscall(struct proc *p, register_t code, register_t realcode,
    125     const struct sysent *callp, register_t args[])
    126 {
    127 	struct ktr_header kth;
    128 	struct ktr_syscall *ktp;
    129 	register_t *argp;
    130 	int argsize, error;
    131 	size_t len;
    132 	u_int i;
    133 
    134 	if (callp == NULL)
    135 		callp = p->p_emul->e_sysent;
    136 
    137 	argsize = callp[code].sy_argsize;
    138 #ifdef _LP64
    139 	if (p->p_flag & P_32)
    140 		argsize = argsize << 1;
    141 #endif
    142 	len = sizeof(struct ktr_syscall) + argsize;
    143 
    144 	p->p_traceflag |= KTRFAC_ACTIVE;
    145 	ktrinitheader(&kth, p, KTR_SYSCALL);
    146 	ktp = malloc(len, M_TEMP, M_WAITOK);
    147 	ktp->ktr_code = realcode;
    148 	ktp->ktr_argsize = argsize;
    149 	argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall));
    150 	for (i = 0; i < (argsize / sizeof(*argp)); i++)
    151 		*argp++ = args[i];
    152 	kth.ktr_buf = (caddr_t)ktp;
    153 	kth.ktr_len = len;
    154 	error = ktrwrite(p, &kth);
    155 	free(ktp, M_TEMP);
    156 	p->p_traceflag &= ~KTRFAC_ACTIVE;
    157 	return error;
    158 }
    159 
    160 int
    161 ktrsysret(struct proc *p, register_t code, int error, register_t *retval)
    162 {
    163 	struct ktr_header kth;
    164 	struct ktr_sysret ktp;
    165 
    166 	p->p_traceflag |= KTRFAC_ACTIVE;
    167 	ktrinitheader(&kth, p, KTR_SYSRET);
    168 	ktp.ktr_code = code;
    169 	ktp.ktr_eosys = 0;			/* XXX unused */
    170 	ktp.ktr_error = error;
    171 	ktp.ktr_retval = retval ? retval[0] : 0;
    172 	ktp.ktr_retval_1 = retval ? retval[1] : 0;
    173 
    174 	kth.ktr_buf = (caddr_t)&ktp;
    175 	kth.ktr_len = sizeof(struct ktr_sysret);
    176 
    177 	error = ktrwrite(p, &kth);
    178 	p->p_traceflag &= ~KTRFAC_ACTIVE;
    179 	return error;
    180 }
    181 
    182 int
    183 ktrnamei(struct proc *p, char *path)
    184 {
    185 	struct ktr_header kth;
    186 	int error;
    187 
    188 	p->p_traceflag |= KTRFAC_ACTIVE;
    189 	ktrinitheader(&kth, p, KTR_NAMEI);
    190 	kth.ktr_len = strlen(path);
    191 	kth.ktr_buf = path;
    192 
    193 	error = ktrwrite(p, &kth);
    194 	p->p_traceflag &= ~KTRFAC_ACTIVE;
    195 	return error;
    196 }
    197 
    198 int
    199 ktremul(struct proc *p)
    200 {
    201 	struct ktr_header kth;
    202 	const char *emul = p->p_emul->e_name;
    203 	int error;
    204 
    205 	p->p_traceflag |= KTRFAC_ACTIVE;
    206 	ktrinitheader(&kth, p, KTR_EMUL);
    207 	kth.ktr_len = strlen(emul);
    208 	kth.ktr_buf = (caddr_t)emul;
    209 
    210 	error = ktrwrite(p, &kth);
    211 	p->p_traceflag &= ~KTRFAC_ACTIVE;
    212 	return error;
    213 }
    214 
    215 int
    216 ktrkmem(struct proc *p, int ktr, const void *buf, size_t len)
    217 {
    218 	struct ktr_header kth;
    219 	int error;
    220 
    221 	p->p_traceflag |= KTRFAC_ACTIVE;
    222 	ktrinitheader(&kth, p, ktr);
    223 	kth.ktr_len = len;
    224 	kth.ktr_buf = buf;
    225 
    226 	error = ktrwrite(p, &kth);
    227 	p->p_traceflag &= ~KTRFAC_ACTIVE;
    228 	return error;
    229 }
    230 
    231 int
    232 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov,
    233     int len, int error)
    234 {
    235 	struct ktr_header kth;
    236 	struct ktr_genio *ktp;
    237 	caddr_t cp;
    238 	int resid = len, cnt;
    239 	int buflen;
    240 
    241 	if (error)
    242 		return error;
    243 
    244 	p->p_traceflag |= KTRFAC_ACTIVE;
    245 
    246 	buflen = min(PAGE_SIZE, len + sizeof(struct ktr_genio));
    247 
    248 	ktrinitheader(&kth, p, KTR_GENIO);
    249 	ktp = malloc(buflen, M_TEMP, M_WAITOK);
    250 	ktp->ktr_fd = fd;
    251 	ktp->ktr_rw = rw;
    252 
    253 	kth.ktr_buf = (caddr_t)ktp;
    254 
    255 	cp = (caddr_t)((char *)ktp + sizeof(struct ktr_genio));
    256 	buflen -= sizeof(struct ktr_genio);
    257 
    258 	while (resid > 0) {
    259 #if 0 /* XXX NJWLWP */
    260 		KDASSERT(p->p_cpu != NULL);
    261 		KDASSERT(p->p_cpu == curcpu());
    262 #endif
    263 		/* XXX NJWLWP */
    264 		if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
    265 			preempt(1);
    266 
    267 		cnt = min(iov->iov_len, buflen);
    268 		if (cnt > resid)
    269 			cnt = resid;
    270 		if ((error = copyin(iov->iov_base, cp, cnt)) != 0)
    271 			break;
    272 
    273 		kth.ktr_len = cnt + sizeof(struct ktr_genio);
    274 
    275 		error = ktrwrite(p, &kth);
    276 		if (__predict_false(error != 0))
    277 			break;
    278 
    279 		iov->iov_base = (caddr_t)iov->iov_base + cnt;
    280 		iov->iov_len -= cnt;
    281 
    282 		if (iov->iov_len == 0)
    283 			iov++;
    284 
    285 		resid -= cnt;
    286 	}
    287 
    288 	free(ktp, M_TEMP);
    289 	p->p_traceflag &= ~KTRFAC_ACTIVE;
    290 	return error;
    291 }
    292 
    293 int
    294 ktrpsig(struct proc *p, int sig, sig_t action, const sigset_t *mask,
    295     const ksiginfo_t *ksi)
    296 {
    297 	int error;
    298 
    299 	struct ktr_header kth;
    300 	struct {
    301 		struct ktr_psig	kp;
    302 		siginfo_t	si;
    303 	} kbuf;
    304 
    305 	p->p_traceflag |= KTRFAC_ACTIVE;
    306 	ktrinitheader(&kth, p, KTR_PSIG);
    307 	kbuf.kp.signo = (char)sig;
    308 	kbuf.kp.action = action;
    309 	kbuf.kp.mask = *mask;
    310 	kth.ktr_buf = (caddr_t)&kbuf;
    311 	if (ksi) {
    312 		kbuf.kp.code = KSI_TRAPCODE(ksi);
    313 		(void)memset(&kbuf.si, 0, sizeof(kbuf.si));
    314 		kbuf.si._info = ksi->ksi_info;
    315 		kth.ktr_len = sizeof(kbuf);
    316 	} else {
    317 		kbuf.kp.code = 0;
    318 		kth.ktr_len = sizeof(struct ktr_psig);
    319 	}
    320 	error = ktrwrite(p, &kth);
    321 	p->p_traceflag &= ~KTRFAC_ACTIVE;
    322 	return error;
    323 }
    324 
    325 int
    326 ktrcsw(struct proc *p, int out, int user)
    327 {
    328 	struct ktr_header kth;
    329 	struct ktr_csw kc;
    330 	int error;
    331 
    332 	p->p_traceflag |= KTRFAC_ACTIVE;
    333 	ktrinitheader(&kth, p, KTR_CSW);
    334 	kc.out = out;
    335 	kc.user = user;
    336 	kth.ktr_buf = (caddr_t)&kc;
    337 	kth.ktr_len = sizeof(struct ktr_csw);
    338 
    339 	error = ktrwrite(p, &kth);
    340 	p->p_traceflag &= ~KTRFAC_ACTIVE;
    341 	return error;
    342 }
    343 
    344 int
    345 ktruser(struct proc *p, const char *id, void *addr, size_t len, int ustr)
    346 {
    347 	struct ktr_header kth;
    348 	struct ktr_user *ktp;
    349 	caddr_t user_dta;
    350 	int error;
    351 
    352 	p->p_traceflag |= KTRFAC_ACTIVE;
    353 	ktrinitheader(&kth, p, KTR_USER);
    354 	ktp = malloc(sizeof(struct ktr_user) + len, M_TEMP, M_WAITOK);
    355 	if (ustr) {
    356 		if (copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL) != 0)
    357 			ktp->ktr_id[0] = '\0';
    358 	} else
    359 		strncpy(ktp->ktr_id, id, KTR_USER_MAXIDLEN);
    360 	ktp->ktr_id[KTR_USER_MAXIDLEN-1] = '\0';
    361 
    362 	user_dta = (caddr_t) ((char *)ktp + sizeof(struct ktr_user));
    363 	if (copyin(addr, (void *) user_dta, len) != 0)
    364 		len = 0;
    365 
    366 	kth.ktr_buf = (void *)ktp;
    367 	kth.ktr_len = sizeof(struct ktr_user) + len;
    368 	error = ktrwrite(p, &kth);
    369 
    370 	free(ktp, M_TEMP);
    371 	p->p_traceflag &= ~KTRFAC_ACTIVE;
    372 	return error;
    373 
    374 }
    375 
    376 int
    377 ktrmmsg(struct proc *p, const void *msgh, size_t size)
    378 {
    379 	struct ktr_header kth;
    380 	struct ktr_mmsg	*kp;
    381 	int error;
    382 
    383 	p->p_traceflag |= KTRFAC_ACTIVE;
    384 	ktrinitheader(&kth, p, KTR_MMSG);
    385 
    386 	kp = (struct ktr_mmsg *)msgh;
    387 	kth.ktr_buf = (caddr_t)kp;
    388 	kth.ktr_len = size;
    389 	error = ktrwrite(p, &kth);
    390 	p->p_traceflag &= ~KTRFAC_ACTIVE;
    391 	return error;
    392 }
    393 
    394 int
    395 ktrmool(struct proc *p, const void *kaddr, size_t size, const void *uaddr)
    396 {
    397 	struct ktr_header kth;
    398 	struct ktr_mool *kp;
    399 	struct ktr_mool *buf;
    400 	int error;
    401 
    402 	p->p_traceflag |= KTRFAC_ACTIVE;
    403 	ktrinitheader(&kth, p, KTR_MOOL);
    404 
    405 	kp = malloc(size + sizeof(*kp), M_TEMP, M_WAITOK);
    406 	kp->uaddr = uaddr;
    407 	kp->size = size;
    408 	buf = kp + 1; /* Skip uaddr and size */
    409 	(void)memcpy(buf, kaddr, size);
    410 
    411 	kth.ktr_buf = (caddr_t)kp;
    412 	kth.ktr_len = size + sizeof(*kp);
    413 	error = ktrwrite(p, &kth);
    414 	free(kp, M_TEMP);
    415 
    416 	p->p_traceflag &= ~KTRFAC_ACTIVE;
    417 	return error;
    418 }
    419 
    420 
    421 /* Interface and common routines */
    422 
    423 int
    424 ktrace_common(struct proc *curp, int ops, int facs, int pid, struct file *fp)
    425 {
    426 	int ret = 0;
    427 	int error = 0;
    428 	int one = 1;
    429 	int descend;
    430 	struct proc *p;
    431 	struct pgrp *pg;
    432 
    433 	curp->p_traceflag |= KTRFAC_ACTIVE;
    434 	descend = ops & KTRFLAG_DESCEND;
    435 	facs = facs & ~((unsigned) KTRFAC_ROOT);
    436 
    437 	/*
    438 	 * Clear all uses of the tracefile
    439 	 */
    440 	if (KTROP(ops) == KTROP_CLEARFILE) {
    441 		proclist_lock_read();
    442 		LIST_FOREACH(p, &allproc, p_list) {
    443 			if (ktrsamefile(p->p_tracep, fp)) {
    444 				if (ktrcanset(curp, p))
    445 					ktrderef(p);
    446 				else
    447 					error = EPERM;
    448 			}
    449 		}
    450 		proclist_unlock_read();
    451 		goto done;
    452 	}
    453 
    454 	/*
    455 	 * Mark fp non-blocking, to avoid problems from possible deadlocks.
    456 	 */
    457 
    458 	if (fp != NULL) {
    459 		fp->f_flag |= FNONBLOCK;
    460 		(*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&one, curp);
    461 	}
    462 
    463 	/*
    464 	 * need something to (un)trace (XXX - why is this here?)
    465 	 */
    466 	if (!facs) {
    467 		error = EINVAL;
    468 		goto done;
    469 	}
    470 	/*
    471 	 * do it
    472 	 */
    473 	if (pid < 0) {
    474 		/*
    475 		 * by process group
    476 		 */
    477 		pg = pg_find(-pid, PFIND_UNLOCK_FAIL);
    478 		if (pg == NULL) {
    479 			error = ESRCH;
    480 			goto done;
    481 		}
    482 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
    483 			if (descend)
    484 				ret |= ktrsetchildren(curp, p, ops, facs, fp);
    485 			else
    486 				ret |= ktrops(curp, p, ops, facs, fp);
    487 		}
    488 
    489 	} else {
    490 		/*
    491 		 * by pid
    492 		 */
    493 		p = p_find(pid, PFIND_UNLOCK_FAIL);
    494 		if (p == NULL) {
    495 			error = ESRCH;
    496 			goto done;
    497 		}
    498 		if (descend)
    499 			ret |= ktrsetchildren(curp, p, ops, facs, fp);
    500 		else
    501 			ret |= ktrops(curp, p, ops, facs, fp);
    502 	}
    503 	proclist_unlock_read();	/* taken by p{g}_find */
    504 	if (!ret)
    505 		error = EPERM;
    506 done:
    507 	curp->p_traceflag &= ~KTRFAC_ACTIVE;
    508 	return (error);
    509 }
    510 
    511 /*
    512  * ktrace system call
    513  */
    514 /* ARGSUSED */
    515 int
    516 sys_fktrace(struct lwp *l, void *v, register_t *retval)
    517 {
    518 	struct sys_fktrace_args /* {
    519 		syscallarg(int) fd;
    520 		syscallarg(int) ops;
    521 		syscallarg(int) facs;
    522 		syscallarg(int) pid;
    523 	} */ *uap = v;
    524 	struct proc *curp = l->l_proc;
    525 	struct file *fp = NULL;
    526 	struct filedesc *fdp = curp->p_fd;
    527 	int error;
    528 
    529 	if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
    530 		return (EBADF);
    531 
    532 	FILE_USE(fp);
    533 
    534 	if ((fp->f_flag & FWRITE) == 0)
    535 		error = EBADF;
    536 	else
    537 		error = ktrace_common(curp, SCARG(uap, ops),
    538 		    SCARG(uap, facs), SCARG(uap, pid), fp);
    539 
    540 	FILE_UNUSE(fp, curp);
    541 
    542 	return error;
    543 }
    544 
    545 /*
    546  * ktrace system call
    547  */
    548 /* ARGSUSED */
    549 int
    550 sys_ktrace(struct lwp *l, void *v, register_t *retval)
    551 {
    552 	struct sys_ktrace_args /* {
    553 		syscallarg(const char *) fname;
    554 		syscallarg(int) ops;
    555 		syscallarg(int) facs;
    556 		syscallarg(int) pid;
    557 	} */ *uap = v;
    558 	struct proc *curp = l->l_proc;
    559 	struct vnode *vp = NULL;
    560 	struct file *fp = NULL;
    561 	int fd;
    562 	int ops = SCARG(uap, ops);
    563 	int error = 0;
    564 	struct nameidata nd;
    565 
    566 	ops = KTROP(ops) | (ops & KTRFLAG_DESCEND);
    567 
    568 	curp->p_traceflag |= KTRFAC_ACTIVE;
    569 	if ((ops & KTROP_CLEAR) == 0) {
    570 		/*
    571 		 * an operation which requires a file argument.
    572 		 */
    573 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname),
    574 		    curp);
    575 		if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
    576 			curp->p_traceflag &= ~KTRFAC_ACTIVE;
    577 			return (error);
    578 		}
    579 		vp = nd.ni_vp;
    580 		VOP_UNLOCK(vp, 0);
    581 		if (vp->v_type != VREG) {
    582 			(void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp);
    583 			curp->p_traceflag &= ~KTRFAC_ACTIVE;
    584 			return (EACCES);
    585 		}
    586 		/*
    587 		 * XXX This uses up a file descriptor slot in the
    588 		 * tracing process for the duration of this syscall.
    589 		 * This is not expected to be a problem.  If
    590 		 * falloc(NULL, ...) DTRT we could skip that part, but
    591 		 * that would require changing its interface to allow
    592 		 * the caller to pass in a ucred..
    593 		 *
    594 		 * This will FILE_USE the fp it returns, if any.
    595 		 * Keep it in use until we return.
    596 		 */
    597 		if ((error = falloc(curp, &fp, &fd)) != 0)
    598 			goto done;
    599 
    600 		fp->f_flag = FWRITE|FAPPEND;
    601 		fp->f_type = DTYPE_VNODE;
    602 		fp->f_ops = &vnops;
    603 		fp->f_data = (caddr_t)vp;
    604 		FILE_SET_MATURE(fp);
    605 		vp = NULL;
    606 	}
    607 	error = ktrace_common(curp, SCARG(uap, ops), SCARG(uap, facs),
    608 	    SCARG(uap, pid), fp);
    609 done:
    610 	if (vp != NULL)
    611 		(void) vn_close(vp, FWRITE, curp->p_ucred, curp);
    612 	if (fp != NULL) {
    613 		FILE_UNUSE(fp, curp);	/* release file */
    614 		fdrelease(curp, fd); 	/* release fd table slot */
    615 	}
    616 	return (error);
    617 }
    618 
    619 int
    620 ktrops(struct proc *curp, struct proc *p, int ops, int facs,
    621     struct file *fp)
    622 {
    623 
    624 	if (!ktrcanset(curp, p))
    625 		return (0);
    626 	if (KTROP(ops) == KTROP_SET) {
    627 		if (p->p_tracep != fp) {
    628 			/*
    629 			 * if trace file already in use, relinquish
    630 			 */
    631 			ktrderef(p);
    632 			p->p_tracep = fp;
    633 			ktradref(p);
    634 		}
    635 		p->p_traceflag |= facs;
    636 		if (curp->p_ucred->cr_uid == 0)
    637 			p->p_traceflag |= KTRFAC_ROOT;
    638 	} else {
    639 		/* KTROP_CLEAR */
    640 		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
    641 			/* no more tracing */
    642 			ktrderef(p);
    643 		}
    644 	}
    645 
    646 	/*
    647 	 * Emit an emulation record, every time there is a ktrace
    648 	 * change/attach request.
    649 	 */
    650 	if (KTRPOINT(p, KTR_EMUL))
    651 		p->p_traceflag |= KTRFAC_TRC_EMUL;
    652 #ifdef __HAVE_SYSCALL_INTERN
    653 	(*p->p_emul->e_syscall_intern)(p);
    654 #endif
    655 
    656 	return (1);
    657 }
    658 
    659 int
    660 ktrsetchildren(struct proc *curp, struct proc *top, int ops, int facs,
    661     struct file *fp)
    662 {
    663 	struct proc *p;
    664 	int ret = 0;
    665 
    666 	p = top;
    667 	for (;;) {
    668 		ret |= ktrops(curp, p, ops, facs, fp);
    669 		/*
    670 		 * If this process has children, descend to them next,
    671 		 * otherwise do any siblings, and if done with this level,
    672 		 * follow back up the tree (but not past top).
    673 		 */
    674 		if (LIST_FIRST(&p->p_children) != NULL) {
    675 			p = LIST_FIRST(&p->p_children);
    676 			continue;
    677 		}
    678 		for (;;) {
    679 			if (p == top)
    680 				return (ret);
    681 			if (LIST_NEXT(p, p_sibling) != NULL) {
    682 				p = LIST_NEXT(p, p_sibling);
    683 				break;
    684 			}
    685 			p = p->p_pptr;
    686 		}
    687 	}
    688 	/*NOTREACHED*/
    689 }
    690 
    691 int
    692 ktrwrite(struct proc *p, struct ktr_header *kth)
    693 {
    694 	struct uio auio;
    695 	struct iovec aiov[2];
    696 	int error, tries;
    697 	struct file *fp = p->p_tracep;
    698 
    699 	if (fp == NULL)
    700 		return 0;
    701 
    702 	if (p->p_traceflag & KTRFAC_TRC_EMUL) {
    703 		/* Add emulation trace before first entry for this process */
    704 		p->p_traceflag &= ~KTRFAC_TRC_EMUL;
    705 		if ((error = ktremul(p)) != 0)
    706 			return error;
    707 	}
    708 
    709 	auio.uio_iov = &aiov[0];
    710 	auio.uio_offset = 0;
    711 	auio.uio_segflg = UIO_SYSSPACE;
    712 	auio.uio_rw = UIO_WRITE;
    713 	aiov[0].iov_base = (caddr_t)kth;
    714 	aiov[0].iov_len = sizeof(struct ktr_header);
    715 	auio.uio_resid = sizeof(struct ktr_header);
    716 	auio.uio_iovcnt = 1;
    717 	auio.uio_procp = (struct proc *)0;
    718 	if (kth->ktr_len > 0) {
    719 		auio.uio_iovcnt++;
    720 		aiov[1].iov_base = (void *)kth->ktr_buf;
    721 		aiov[1].iov_len = kth->ktr_len;
    722 		auio.uio_resid += kth->ktr_len;
    723 	}
    724 
    725 	simple_lock(&fp->f_slock);
    726 	FILE_USE(fp);
    727 
    728 	tries = 0;
    729 	do {
    730 		error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio,
    731 		    fp->f_cred, FOF_UPDATE_OFFSET);
    732 		tries++;
    733 		if (error == EWOULDBLOCK)
    734 			preempt(1);
    735 	} while ((error == EWOULDBLOCK) && (tries < 3));
    736 	FILE_UNUSE(fp, NULL);
    737 
    738 	if (__predict_true(error == 0))
    739 		return (0);
    740 	/*
    741 	 * If error encountered, give up tracing on this vnode.  Don't report
    742 	 * EPIPE as this can easily happen with fktrace()/ktruss.
    743 	 */
    744 	if (error != EPIPE)
    745 		log(LOG_NOTICE,
    746 		    "ktrace write failed, errno %d, tracing stopped\n",
    747 		    error);
    748 	proclist_lock_read();
    749 	LIST_FOREACH(p, &allproc, p_list) {
    750 		if (ktrsamefile(p->p_tracep, fp))
    751 			ktrderef(p);
    752 	}
    753 	proclist_unlock_read();
    754 
    755 	return (error);
    756 }
    757 
    758 /*
    759  * Return true if caller has permission to set the ktracing state
    760  * of target.  Essentially, the target can't possess any
    761  * more permissions than the caller.  KTRFAC_ROOT signifies that
    762  * root previously set the tracing status on the target process, and
    763  * so, only root may further change it.
    764  *
    765  * TODO: check groups.  use caller effective gid.
    766  */
    767 int
    768 ktrcanset(struct proc *callp, struct proc *targetp)
    769 {
    770 	struct pcred *caller = callp->p_cred;
    771 	struct pcred *target = targetp->p_cred;
    772 
    773 	if ((caller->pc_ucred->cr_uid == target->p_ruid &&
    774 	    target->p_ruid == target->p_svuid &&
    775 	    caller->p_rgid == target->p_rgid &&	/* XXX */
    776 	    target->p_rgid == target->p_svgid &&
    777 	    (targetp->p_traceflag & KTRFAC_ROOT) == 0 &&
    778 	    (targetp->p_flag & P_SUGID) == 0) ||
    779 	    caller->pc_ucred->cr_uid == 0)
    780 		return (1);
    781 
    782 	return (0);
    783 }
    784 #endif /* KTRACE */
    785 
    786 /*
    787  * Put user defined entry to ktrace records.
    788  */
    789 int
    790 sys_utrace(struct lwp *l, void *v, register_t *retval)
    791 {
    792 #ifdef KTRACE
    793 	struct sys_utrace_args /* {
    794 		syscallarg(const char *) label;
    795 		syscallarg(void *) addr;
    796 		syscallarg(size_t) len;
    797 	} */ *uap = v;
    798 	struct proc *p = l->l_proc;
    799 
    800 	if (!KTRPOINT(p, KTR_USER))
    801 		return (0);
    802 
    803 	if (SCARG(uap, len) > KTR_USER_MAXLEN)
    804 		return (EINVAL);
    805 
    806 	ktruser(p, SCARG(uap, label), SCARG(uap, addr), SCARG(uap, len), 1);
    807 
    808 	return (0);
    809 #else /* !KTRACE */
    810 	return ENOSYS;
    811 #endif /* KTRACE */
    812 }
    813