kern_ktrace.c revision 1.44 1 /* $NetBSD: kern_ktrace.c,v 1.44 2000/05/29 22:04:11 sommerfeld Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)kern_ktrace.c 8.5 (Berkeley) 5/14/95
36 */
37
38 #include "opt_ktrace.h"
39
40 #ifdef KTRACE
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/proc.h>
45 #include <sys/file.h>
46 #include <sys/namei.h>
47 #include <sys/vnode.h>
48 #include <sys/ktrace.h>
49 #include <sys/malloc.h>
50 #include <sys/syslog.h>
51 #include <sys/filedesc.h>
52 #include <sys/ioctl.h>
53
54 #include <sys/mount.h>
55 #include <sys/syscallargs.h>
56
57 int ktrace_common __P((struct proc *, int, int, int, struct file *));
58 void ktrinitheader __P((struct ktr_header *, struct proc *, int));
59 int ktrops __P((struct proc *, struct proc *, int, int, struct file *));
60 int ktrsetchildren __P((struct proc *, struct proc *, int, int,
61 struct file *));
62 int ktrwrite __P((struct proc *, struct ktr_header *));
63 int ktrcanset __P((struct proc *, struct proc *));
64 int ktrsamefile __P((struct file *, struct file *));
65
66 /*
67 * "deep" compare of two files for the purposes of clearing a trace.
68 * Returns true if they're the same open file, or if they point at the
69 * same underlying vnode/socket.
70 */
71
72 int
73 ktrsamefile (f1, f2)
74 struct file *f1, *f2;
75 {
76 return ((f1 == f2) ||
77 ((f1->f_type == f2->f_type) &&
78 (f1->f_data == f2->f_data)));
79 }
80
81 void
82 ktrderef(p)
83 struct proc *p;
84 {
85 struct file *fp = p->p_tracep;
86 p->p_traceflag = 0;
87 if (fp == NULL)
88 return;
89 FILE_USE(fp);
90 closef(fp, NULL);
91
92 p->p_tracep = NULL;
93 }
94
95 void
96 ktradref(p)
97 struct proc *p;
98 {
99 struct file *fp = p->p_tracep;
100
101 fp->f_count++;
102 }
103
104 void
105 ktrinitheader(kth, p, type)
106 struct ktr_header *kth;
107 struct proc *p;
108 int type;
109 {
110
111 memset(kth, 0, sizeof(*kth));
112 kth->ktr_type = type;
113 microtime(&kth->ktr_time);
114 kth->ktr_pid = p->p_pid;
115 memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN);
116 }
117
118 void
119 ktrsyscall(p, code, argsize, args)
120 struct proc *p;
121 register_t code;
122 size_t argsize;
123 register_t args[];
124 {
125 struct ktr_header kth;
126 struct ktr_syscall *ktp;
127 register_t *argp;
128 size_t len = sizeof(struct ktr_syscall) + argsize;
129 int i;
130
131 p->p_traceflag |= KTRFAC_ACTIVE;
132 ktrinitheader(&kth, p, KTR_SYSCALL);
133 ktp = malloc(len, M_TEMP, M_WAITOK);
134 ktp->ktr_code = code;
135 ktp->ktr_argsize = argsize;
136 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall));
137 for (i = 0; i < (argsize / sizeof(*argp)); i++)
138 *argp++ = args[i];
139 kth.ktr_buf = (caddr_t)ktp;
140 kth.ktr_len = len;
141 (void) ktrwrite(p, &kth);
142 free(ktp, M_TEMP);
143 p->p_traceflag &= ~KTRFAC_ACTIVE;
144 }
145
146 void
147 ktrsysret(p, code, error, retval)
148 struct proc *p;
149 register_t code;
150 int error;
151 register_t retval;
152 {
153 struct ktr_header kth;
154 struct ktr_sysret ktp;
155
156 p->p_traceflag |= KTRFAC_ACTIVE;
157 ktrinitheader(&kth, p, KTR_SYSRET);
158 ktp.ktr_code = code;
159 ktp.ktr_eosys = 0; /* XXX unused */
160 ktp.ktr_error = error;
161 ktp.ktr_retval = retval; /* what about val2 ? */
162
163 kth.ktr_buf = (caddr_t)&ktp;
164 kth.ktr_len = sizeof(struct ktr_sysret);
165
166 (void) ktrwrite(p, &kth);
167 p->p_traceflag &= ~KTRFAC_ACTIVE;
168 }
169
170 void
171 ktrnamei(p, path)
172 struct proc *p;
173 char *path;
174 {
175 struct ktr_header kth;
176
177 p->p_traceflag |= KTRFAC_ACTIVE;
178 ktrinitheader(&kth, p, KTR_NAMEI);
179 kth.ktr_len = strlen(path);
180 kth.ktr_buf = path;
181
182 (void) ktrwrite(p, &kth);
183 p->p_traceflag &= ~KTRFAC_ACTIVE;
184 }
185
186 void
187 ktremul(p)
188 struct proc *p;
189 {
190 struct ktr_header kth;
191 char *emul = p->p_emul->e_name;
192
193 p->p_traceflag |= KTRFAC_ACTIVE;
194 ktrinitheader(&kth, p, KTR_EMUL);
195 kth.ktr_len = strlen(emul);
196 kth.ktr_buf = emul;
197
198 (void) ktrwrite(p, &kth);
199 p->p_traceflag &= ~KTRFAC_ACTIVE;
200 }
201
202 void
203 ktrgenio(p, fd, rw, iov, len, error)
204 struct proc *p;
205 int fd;
206 enum uio_rw rw;
207 struct iovec *iov;
208 int len, error;
209 {
210 struct ktr_header kth;
211 struct ktr_genio *ktp;
212 caddr_t cp;
213 int resid = len, cnt;
214 int buflen;
215
216 if (error)
217 return;
218
219 p->p_traceflag |= KTRFAC_ACTIVE;
220
221 buflen = min(PAGE_SIZE, len + sizeof(struct ktr_genio));
222
223 ktrinitheader(&kth, p, KTR_GENIO);
224 ktp = malloc(buflen, M_TEMP, M_WAITOK);
225 ktp->ktr_fd = fd;
226 ktp->ktr_rw = rw;
227
228 kth.ktr_buf = (caddr_t)ktp;
229
230 cp = (caddr_t)((char *)ktp + sizeof(struct ktr_genio));
231 buflen -= sizeof(struct ktr_genio);
232
233 while (resid > 0) {
234 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
235 preempt(NULL);
236
237 cnt = min(iov->iov_len, buflen);
238 if (cnt > resid)
239 cnt = resid;
240 if (copyin(iov->iov_base, cp, cnt))
241 break;
242
243 kth.ktr_len = cnt + sizeof(struct ktr_genio);
244
245 if (__predict_false(ktrwrite(p, &kth) != 0))
246 break;
247
248 iov->iov_base = (caddr_t)iov->iov_base + cnt;
249 iov->iov_len -= cnt;
250
251 if (iov->iov_len == 0)
252 iov++;
253
254 resid -= cnt;
255 }
256
257 free(ktp, M_TEMP);
258 p->p_traceflag &= ~KTRFAC_ACTIVE;
259 }
260
261 void
262 ktrpsig(p, sig, action, mask, code)
263 struct proc *p;
264 int sig;
265 sig_t action;
266 sigset_t *mask;
267 int code;
268 {
269 struct ktr_header kth;
270 struct ktr_psig kp;
271
272 p->p_traceflag |= KTRFAC_ACTIVE;
273 ktrinitheader(&kth, p, KTR_PSIG);
274 kp.signo = (char)sig;
275 kp.action = action;
276 kp.mask = *mask;
277 kp.code = code;
278 kth.ktr_buf = (caddr_t)&kp;
279 kth.ktr_len = sizeof(struct ktr_psig);
280
281 (void) ktrwrite(p, &kth);
282 p->p_traceflag &= ~KTRFAC_ACTIVE;
283 }
284
285 void
286 ktrcsw(p, out, user)
287 struct proc *p;
288 int out, user;
289 {
290 struct ktr_header kth;
291 struct ktr_csw kc;
292
293 p->p_traceflag |= KTRFAC_ACTIVE;
294 ktrinitheader(&kth, p, KTR_CSW);
295 kc.out = out;
296 kc.user = user;
297 kth.ktr_buf = (caddr_t)&kc;
298 kth.ktr_len = sizeof(struct ktr_csw);
299
300 (void) ktrwrite(p, &kth);
301 p->p_traceflag &= ~KTRFAC_ACTIVE;
302 }
303
304 /* Interface and common routines */
305
306 int
307 ktrace_common (curp, ops, facs, pid, fp)
308 struct proc *curp;
309 int ops, facs, pid;
310 struct file *fp;
311 {
312 int ret = 0;
313 int error = 0;
314 int one = 1;
315 int descend;
316 struct proc *p;
317 struct pgrp *pg;
318
319 curp->p_traceflag |= KTRFAC_ACTIVE;
320 descend = ops & KTRFLAG_DESCEND;
321 facs = facs & ~((unsigned) KTRFAC_ROOT);
322
323 /*
324 * Clear all uses of the tracefile
325 */
326 if (KTROP(ops) == KTROP_CLEARFILE) {
327 proclist_lock_read();
328 for (p = LIST_FIRST(&allproc); p != NULL;
329 p = LIST_NEXT(p, p_list)) {
330 if (ktrsamefile(p->p_tracep, fp)) {
331 if (ktrcanset(curp, p))
332 ktrderef(p);
333 else
334 error = EPERM;
335 }
336 }
337 proclist_unlock_read();
338 goto done;
339 }
340
341 /*
342 * Mark fp non-blocking, to avoid problems from possible deadlocks.
343 */
344
345 if (fp != NULL) {
346 fp->f_flag |= FNONBLOCK;
347 (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&one, curp);
348 }
349
350 /*
351 * need something to (un)trace (XXX - why is this here?)
352 */
353 if (!facs) {
354 error = EINVAL;
355 goto done;
356 }
357 /*
358 * do it
359 */
360 if (pid < 0) {
361 /*
362 * by process group
363 */
364 pg = pgfind(-pid);
365 if (pg == NULL) {
366 error = ESRCH;
367 goto done;
368 }
369 for (p = LIST_FIRST(&pg->pg_members); p != NULL;
370 p = LIST_NEXT(p, p_pglist)) {
371 if (descend)
372 ret |= ktrsetchildren(curp, p, ops, facs, fp);
373 else
374 ret |= ktrops(curp, p, ops, facs, fp);
375 }
376
377 } else {
378 /*
379 * by pid
380 */
381 p = pfind(pid);
382 if (p == NULL) {
383 error = ESRCH;
384 goto done;
385 }
386 if (descend)
387 ret |= ktrsetchildren(curp, p, ops, facs, fp);
388 else
389 ret |= ktrops(curp, p, ops, facs, fp);
390 }
391 if (!ret)
392 error = EPERM;
393 done:
394 curp->p_traceflag &= ~KTRFAC_ACTIVE;
395 return (error);
396 }
397
398 /*
399 * ktrace system call
400 */
401 /* ARGSUSED */
402 int
403 sys_fktrace(curp, v, retval)
404 struct proc *curp;
405 void *v;
406 register_t *retval;
407 {
408 struct sys_fktrace_args /* {
409 syscallarg(int) fd;
410 syscallarg(int) ops;
411 syscallarg(int) facs;
412 syscallarg(int) pid;
413 } */ *uap = v;
414 struct file *fp = NULL;
415 struct filedesc *fdp = curp->p_fd;
416
417 if (((u_int)SCARG(uap, fd)) >= fdp->fd_nfiles ||
418 (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL ||
419 (fp->f_flag & FWRITE) == 0)
420 return (EBADF);
421
422 return ktrace_common(curp, SCARG(uap, ops),
423 SCARG(uap, facs), SCARG(uap, pid), fp);
424 }
425
426 /*
427 * ktrace system call
428 */
429 /* ARGSUSED */
430 int
431 sys_ktrace(curp, v, retval)
432 struct proc *curp;
433 void *v;
434 register_t *retval;
435 {
436 struct sys_ktrace_args /* {
437 syscallarg(const char *) fname;
438 syscallarg(int) ops;
439 syscallarg(int) facs;
440 syscallarg(int) pid;
441 } */ *uap = v;
442 struct vnode *vp = NULL;
443 struct file *fp = NULL;
444 int fd;
445 int ops = SCARG(uap, ops);
446 int error = 0;
447 struct nameidata nd;
448
449 ops = KTROP(ops) | (ops & KTRFLAG_DESCEND);
450
451 curp->p_traceflag |= KTRFAC_ACTIVE;
452 if (ops != KTROP_CLEAR) {
453 /*
454 * an operation which requires a file argument.
455 */
456 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname),
457 curp);
458 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
459 curp->p_traceflag &= ~KTRFAC_ACTIVE;
460 return (error);
461 }
462 vp = nd.ni_vp;
463 VOP_UNLOCK(vp, 0);
464 if (vp->v_type != VREG) {
465 (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp);
466 curp->p_traceflag &= ~KTRFAC_ACTIVE;
467 return (EACCES);
468 }
469 /*
470 * XXX This uses up a file descriptor slot in the
471 * tracing process for the duration of this syscall.
472 * This is not expected to be a problem. If
473 * falloc(NULL, ...) DTRT we could skip that part, but
474 * that would require changing its interface to allow
475 * the caller to pass in a ucred..
476 *
477 * This will FILE_USE the fp it returns, if any.
478 * Keep it in use until we return.
479 */
480 if ((error = falloc(curp, &fp, &fd)) != 0)
481 goto done;
482
483 fp->f_flag = FWRITE|FAPPEND;
484 fp->f_type = DTYPE_VNODE;
485 fp->f_ops = &vnops;
486 fp->f_data = (caddr_t)vp;
487 vp = NULL;
488 }
489 error = ktrace_common(curp, SCARG(uap, ops), SCARG(uap, facs),
490 SCARG(uap, pid), fp);
491 done:
492 if (vp != NULL)
493 (void) vn_close(vp, FWRITE, curp->p_ucred, curp);
494 if (fp != NULL) {
495 FILE_UNUSE(fp, curp); /* release file */
496 fdrelease(curp, fd); /* release fd table slot */
497 }
498 return (error);
499 }
500
501 int
502 ktrops(curp, p, ops, facs, fp)
503 struct proc *p, *curp;
504 int ops, facs;
505 struct file *fp;
506 {
507
508 if (!ktrcanset(curp, p))
509 return (0);
510 if (KTROP(ops) == KTROP_SET) {
511 if (p->p_tracep != fp) {
512 /*
513 * if trace file already in use, relinquish
514 */
515 ktrderef(p);
516 p->p_tracep = fp;
517 ktradref(p);
518 }
519 p->p_traceflag |= facs;
520 if (curp->p_ucred->cr_uid == 0)
521 p->p_traceflag |= KTRFAC_ROOT;
522 } else {
523 /* KTROP_CLEAR */
524 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
525 /* no more tracing */
526 ktrderef(p);
527 }
528 }
529
530 /*
531 * Emit an emulation record, every time there is a ktrace
532 * change/attach request.
533 */
534 if (KTRPOINT(p, KTR_EMUL))
535 ktremul(p);
536
537 return (1);
538 }
539
540 int
541 ktrsetchildren(curp, top, ops, facs, fp)
542 struct proc *curp, *top;
543 int ops, facs;
544 struct file *fp;
545 {
546 struct proc *p;
547 int ret = 0;
548
549 p = top;
550 for (;;) {
551 ret |= ktrops(curp, p, ops, facs, fp);
552 /*
553 * If this process has children, descend to them next,
554 * otherwise do any siblings, and if done with this level,
555 * follow back up the tree (but not past top).
556 */
557 if (LIST_FIRST(&p->p_children) != NULL)
558 p = LIST_FIRST(&p->p_children);
559 else for (;;) {
560 if (p == top)
561 return (ret);
562 if (LIST_NEXT(p, p_sibling) != NULL) {
563 p = LIST_NEXT(p, p_sibling);
564 break;
565 }
566 p = p->p_pptr;
567 }
568 }
569 /*NOTREACHED*/
570 }
571
572 int
573 ktrwrite(p, kth)
574 struct proc *p;
575 struct ktr_header *kth;
576 {
577 struct uio auio;
578 struct iovec aiov[2];
579 int error, tries;
580 struct file *fp = p->p_tracep;
581
582 if (fp == NULL)
583 return 0;
584
585 auio.uio_iov = &aiov[0];
586 auio.uio_offset = 0;
587 auio.uio_segflg = UIO_SYSSPACE;
588 auio.uio_rw = UIO_WRITE;
589 aiov[0].iov_base = (caddr_t)kth;
590 aiov[0].iov_len = sizeof(struct ktr_header);
591 auio.uio_resid = sizeof(struct ktr_header);
592 auio.uio_iovcnt = 1;
593 auio.uio_procp = (struct proc *)0;
594 if (kth->ktr_len > 0) {
595 auio.uio_iovcnt++;
596 aiov[1].iov_base = kth->ktr_buf;
597 aiov[1].iov_len = kth->ktr_len;
598 auio.uio_resid += kth->ktr_len;
599 }
600
601 FILE_USE(fp);
602
603 tries = 0;
604 do {
605 error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio,
606 fp->f_cred, FOF_UPDATE_OFFSET);
607 tries++;
608 if (error == EWOULDBLOCK)
609 yield();
610 } while ((error == EWOULDBLOCK) && (tries < 3));
611 FILE_UNUSE(fp, NULL);
612
613 if (__predict_true(error == 0))
614 return (0);
615 /*
616 * If error encountered, give up tracing on this vnode. Don't report
617 * EPIPE as this can easily happen with fktrace()/ktruss.
618 */
619 if (error != EPIPE)
620 log(LOG_NOTICE,
621 "ktrace write failed, errno %d, tracing stopped\n",
622 error);
623 proclist_lock_read();
624 for (p = LIST_FIRST(&allproc); p != NULL; p = LIST_NEXT(p, p_list)) {
625 if (ktrsamefile(p->p_tracep, fp))
626 ktrderef(p);
627 }
628 proclist_unlock_read();
629
630 return (error);
631 }
632
633 /*
634 * Return true if caller has permission to set the ktracing state
635 * of target. Essentially, the target can't possess any
636 * more permissions than the caller. KTRFAC_ROOT signifies that
637 * root previously set the tracing status on the target process, and
638 * so, only root may further change it.
639 *
640 * TODO: check groups. use caller effective gid.
641 */
642 int
643 ktrcanset(callp, targetp)
644 struct proc *callp, *targetp;
645 {
646 struct pcred *caller = callp->p_cred;
647 struct pcred *target = targetp->p_cred;
648
649 if ((caller->pc_ucred->cr_uid == target->p_ruid &&
650 target->p_ruid == target->p_svuid &&
651 caller->p_rgid == target->p_rgid && /* XXX */
652 target->p_rgid == target->p_svgid &&
653 (targetp->p_traceflag & KTRFAC_ROOT) == 0) ||
654 caller->pc_ucred->cr_uid == 0)
655 return (1);
656
657 return (0);
658 }
659
660 #endif
661