kern_ktrace.c revision 1.55.8.2 1 /* $NetBSD: kern_ktrace.c,v 1.55.8.2 2002/07/15 10:36:32 gehenna Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)kern_ktrace.c 8.5 (Berkeley) 5/14/95
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.55.8.2 2002/07/15 10:36:32 gehenna Exp $");
40
41 #include "opt_ktrace.h"
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/file.h>
47 #include <sys/namei.h>
48 #include <sys/vnode.h>
49 #include <sys/ktrace.h>
50 #include <sys/malloc.h>
51 #include <sys/syslog.h>
52 #include <sys/filedesc.h>
53 #include <sys/ioctl.h>
54
55 #include <sys/mount.h>
56 #include <sys/syscallargs.h>
57
58 #ifdef KTRACE
59
60 int ktrace_common(struct proc *, int, int, int, struct file *);
61 void ktrinitheader(struct ktr_header *, struct proc *, int);
62 int ktrops(struct proc *, struct proc *, int, int, struct file *);
63 int ktrsetchildren(struct proc *, struct proc *, int, int,
64 struct file *);
65 int ktrwrite(struct proc *, struct ktr_header *);
66 int ktrcanset(struct proc *, struct proc *);
67 int ktrsamefile(struct file *, struct file *);
68
69 /*
70 * "deep" compare of two files for the purposes of clearing a trace.
71 * Returns true if they're the same open file, or if they point at the
72 * same underlying vnode/socket.
73 */
74
75 int
76 ktrsamefile(struct file *f1, struct file *f2)
77 {
78 return ((f1 == f2) ||
79 ((f1 != NULL) && (f2 != NULL) &&
80 (f1->f_type == f2->f_type) &&
81 (f1->f_data == f2->f_data)));
82 }
83
84 void
85 ktrderef(struct proc *p)
86 {
87 struct file *fp = p->p_tracep;
88 p->p_traceflag = 0;
89 if (fp == NULL)
90 return;
91 FILE_USE(fp);
92 closef(fp, NULL);
93
94 p->p_tracep = NULL;
95 }
96
97 void
98 ktradref(struct proc *p)
99 {
100 struct file *fp = p->p_tracep;
101
102 fp->f_count++;
103 }
104
105 void
106 ktrinitheader(struct ktr_header *kth, struct proc *p, int type)
107 {
108
109 memset(kth, 0, sizeof(*kth));
110 kth->ktr_type = type;
111 microtime(&kth->ktr_time);
112 kth->ktr_pid = p->p_pid;
113 memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN);
114 }
115
116 void
117 ktrsyscall(struct proc *p, register_t code, register_t args[])
118 {
119 struct ktr_header kth;
120 struct ktr_syscall *ktp;
121 register_t *argp;
122 int argsize;
123 size_t len;
124 int i;
125
126 argsize = p->p_emul->e_sysent[code].sy_narg * sizeof (register_t);
127 len = sizeof(struct ktr_syscall) + argsize;
128
129 p->p_traceflag |= KTRFAC_ACTIVE;
130 ktrinitheader(&kth, p, KTR_SYSCALL);
131 ktp = malloc(len, M_TEMP, M_WAITOK);
132 ktp->ktr_code = code;
133 ktp->ktr_argsize = argsize;
134 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall));
135 for (i = 0; i < (argsize / sizeof(*argp)); i++)
136 *argp++ = args[i];
137 kth.ktr_buf = (caddr_t)ktp;
138 kth.ktr_len = len;
139 (void) ktrwrite(p, &kth);
140 free(ktp, M_TEMP);
141 p->p_traceflag &= ~KTRFAC_ACTIVE;
142 }
143
144 void
145 ktrsysret(struct proc *p, register_t code, int error, register_t retval)
146 {
147 struct ktr_header kth;
148 struct ktr_sysret ktp;
149
150 p->p_traceflag |= KTRFAC_ACTIVE;
151 ktrinitheader(&kth, p, KTR_SYSRET);
152 ktp.ktr_code = code;
153 ktp.ktr_eosys = 0; /* XXX unused */
154 ktp.ktr_error = error;
155 ktp.ktr_retval = retval; /* what about val2 ? */
156
157 kth.ktr_buf = (caddr_t)&ktp;
158 kth.ktr_len = sizeof(struct ktr_sysret);
159
160 (void) ktrwrite(p, &kth);
161 p->p_traceflag &= ~KTRFAC_ACTIVE;
162 }
163
164 void
165 ktrnamei(struct proc *p, char *path)
166 {
167 struct ktr_header kth;
168
169 p->p_traceflag |= KTRFAC_ACTIVE;
170 ktrinitheader(&kth, p, KTR_NAMEI);
171 kth.ktr_len = strlen(path);
172 kth.ktr_buf = path;
173
174 (void) ktrwrite(p, &kth);
175 p->p_traceflag &= ~KTRFAC_ACTIVE;
176 }
177
178 void
179 ktremul(struct proc *p)
180 {
181 struct ktr_header kth;
182 const char *emul = p->p_emul->e_name;
183
184 p->p_traceflag |= KTRFAC_ACTIVE;
185 ktrinitheader(&kth, p, KTR_EMUL);
186 kth.ktr_len = strlen(emul);
187 kth.ktr_buf = (caddr_t)emul;
188
189 (void) ktrwrite(p, &kth);
190 p->p_traceflag &= ~KTRFAC_ACTIVE;
191 }
192
193 void
194 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov,
195 int len, int error)
196 {
197 struct ktr_header kth;
198 struct ktr_genio *ktp;
199 caddr_t cp;
200 int resid = len, cnt;
201 int buflen;
202
203 if (error)
204 return;
205
206 p->p_traceflag |= KTRFAC_ACTIVE;
207
208 buflen = min(PAGE_SIZE, len + sizeof(struct ktr_genio));
209
210 ktrinitheader(&kth, p, KTR_GENIO);
211 ktp = malloc(buflen, M_TEMP, M_WAITOK);
212 ktp->ktr_fd = fd;
213 ktp->ktr_rw = rw;
214
215 kth.ktr_buf = (caddr_t)ktp;
216
217 cp = (caddr_t)((char *)ktp + sizeof(struct ktr_genio));
218 buflen -= sizeof(struct ktr_genio);
219
220 while (resid > 0) {
221 KDASSERT(p->p_cpu != NULL);
222 KDASSERT(p->p_cpu == curcpu());
223 if (p->p_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
224 preempt(NULL);
225
226 cnt = min(iov->iov_len, buflen);
227 if (cnt > resid)
228 cnt = resid;
229 if (copyin(iov->iov_base, cp, cnt))
230 break;
231
232 kth.ktr_len = cnt + sizeof(struct ktr_genio);
233
234 if (__predict_false(ktrwrite(p, &kth) != 0))
235 break;
236
237 iov->iov_base = (caddr_t)iov->iov_base + cnt;
238 iov->iov_len -= cnt;
239
240 if (iov->iov_len == 0)
241 iov++;
242
243 resid -= cnt;
244 }
245
246 free(ktp, M_TEMP);
247 p->p_traceflag &= ~KTRFAC_ACTIVE;
248 }
249
250 void
251 ktrpsig(struct proc *p, int sig, sig_t action, sigset_t *mask, int code)
252 {
253 struct ktr_header kth;
254 struct ktr_psig kp;
255
256 p->p_traceflag |= KTRFAC_ACTIVE;
257 ktrinitheader(&kth, p, KTR_PSIG);
258 kp.signo = (char)sig;
259 kp.action = action;
260 kp.mask = *mask;
261 kp.code = code;
262 kth.ktr_buf = (caddr_t)&kp;
263 kth.ktr_len = sizeof(struct ktr_psig);
264
265 (void) ktrwrite(p, &kth);
266 p->p_traceflag &= ~KTRFAC_ACTIVE;
267 }
268
269 void
270 ktrcsw(struct proc *p, int out, int user)
271 {
272 struct ktr_header kth;
273 struct ktr_csw kc;
274
275 p->p_traceflag |= KTRFAC_ACTIVE;
276 ktrinitheader(&kth, p, KTR_CSW);
277 kc.out = out;
278 kc.user = user;
279 kth.ktr_buf = (caddr_t)&kc;
280 kth.ktr_len = sizeof(struct ktr_csw);
281
282 (void) ktrwrite(p, &kth);
283 p->p_traceflag &= ~KTRFAC_ACTIVE;
284 }
285
286 void
287 ktruser(p, id, addr, len, ustr)
288 struct proc *p;
289 const char *id;
290 void *addr;
291 size_t len;
292 int ustr;
293 {
294 struct ktr_header kth;
295 struct ktr_user *ktp;
296 caddr_t user_dta;
297
298 p->p_traceflag |= KTRFAC_ACTIVE;
299 ktrinitheader(&kth, p, KTR_USER);
300 ktp = malloc(sizeof(struct ktr_user) + len, M_TEMP, M_WAITOK);
301 if (ustr) {
302 if (copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL) != 0)
303 ktp->ktr_id[0] = '\0';
304 } else
305 strncpy(ktp->ktr_id, id, KTR_USER_MAXIDLEN);
306 ktp->ktr_id[KTR_USER_MAXIDLEN-1] = '\0';
307
308 user_dta = (caddr_t) ((char *)ktp + sizeof(struct ktr_user));
309 if (copyin(addr, (void *) user_dta, len) != 0)
310 len = 0;
311
312 kth.ktr_buf = (void *)ktp;
313 kth.ktr_len = sizeof(struct ktr_user) + len;
314 (void) ktrwrite(p, &kth);
315
316 free(ktp, M_TEMP);
317 p->p_traceflag &= ~KTRFAC_ACTIVE;
318
319 }
320
321 /* Interface and common routines */
322
323 int
324 ktrace_common(struct proc *curp, int ops, int facs, int pid, struct file *fp)
325 {
326 int ret = 0;
327 int error = 0;
328 int one = 1;
329 int descend;
330 struct proc *p;
331 struct pgrp *pg;
332
333 curp->p_traceflag |= KTRFAC_ACTIVE;
334 descend = ops & KTRFLAG_DESCEND;
335 facs = facs & ~((unsigned) KTRFAC_ROOT);
336
337 /*
338 * Clear all uses of the tracefile
339 */
340 if (KTROP(ops) == KTROP_CLEARFILE) {
341 proclist_lock_read();
342 for (p = LIST_FIRST(&allproc); p != NULL;
343 p = LIST_NEXT(p, p_list)) {
344 if (ktrsamefile(p->p_tracep, fp)) {
345 if (ktrcanset(curp, p))
346 ktrderef(p);
347 else
348 error = EPERM;
349 }
350 }
351 proclist_unlock_read();
352 goto done;
353 }
354
355 /*
356 * Mark fp non-blocking, to avoid problems from possible deadlocks.
357 */
358
359 if (fp != NULL) {
360 fp->f_flag |= FNONBLOCK;
361 (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&one, curp);
362 }
363
364 /*
365 * need something to (un)trace (XXX - why is this here?)
366 */
367 if (!facs) {
368 error = EINVAL;
369 goto done;
370 }
371 /*
372 * do it
373 */
374 if (pid < 0) {
375 /*
376 * by process group
377 */
378 pg = pgfind(-pid);
379 if (pg == NULL) {
380 error = ESRCH;
381 goto done;
382 }
383 for (p = LIST_FIRST(&pg->pg_members); p != NULL;
384 p = LIST_NEXT(p, p_pglist)) {
385 if (descend)
386 ret |= ktrsetchildren(curp, p, ops, facs, fp);
387 else
388 ret |= ktrops(curp, p, ops, facs, fp);
389 }
390
391 } else {
392 /*
393 * by pid
394 */
395 p = pfind(pid);
396 if (p == NULL) {
397 error = ESRCH;
398 goto done;
399 }
400 if (descend)
401 ret |= ktrsetchildren(curp, p, ops, facs, fp);
402 else
403 ret |= ktrops(curp, p, ops, facs, fp);
404 }
405 if (!ret)
406 error = EPERM;
407 done:
408 curp->p_traceflag &= ~KTRFAC_ACTIVE;
409 return (error);
410 }
411
412 /*
413 * ktrace system call
414 */
415 /* ARGSUSED */
416 int
417 sys_fktrace(struct proc *curp, void *v, register_t *retval)
418 {
419 struct sys_fktrace_args /* {
420 syscallarg(int) fd;
421 syscallarg(int) ops;
422 syscallarg(int) facs;
423 syscallarg(int) pid;
424 } */ *uap = v;
425 struct file *fp = NULL;
426 struct filedesc *fdp = curp->p_fd;
427
428 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
429 return (EBADF);
430
431 if ((fp->f_flag & FWRITE) == 0)
432 return (EBADF);
433
434 return ktrace_common(curp, SCARG(uap, ops),
435 SCARG(uap, facs), SCARG(uap, pid), fp);
436 }
437
438 /*
439 * ktrace system call
440 */
441 /* ARGSUSED */
442 int
443 sys_ktrace(struct proc *curp, void *v, register_t *retval)
444 {
445 struct sys_ktrace_args /* {
446 syscallarg(const char *) fname;
447 syscallarg(int) ops;
448 syscallarg(int) facs;
449 syscallarg(int) pid;
450 } */ *uap = v;
451 struct vnode *vp = NULL;
452 struct file *fp = NULL;
453 int fd;
454 int ops = SCARG(uap, ops);
455 int error = 0;
456 struct nameidata nd;
457
458 ops = KTROP(ops) | (ops & KTRFLAG_DESCEND);
459
460 curp->p_traceflag |= KTRFAC_ACTIVE;
461 if (ops != KTROP_CLEAR) {
462 /*
463 * an operation which requires a file argument.
464 */
465 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname),
466 curp);
467 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
468 curp->p_traceflag &= ~KTRFAC_ACTIVE;
469 return (error);
470 }
471 vp = nd.ni_vp;
472 VOP_UNLOCK(vp, 0);
473 if (vp->v_type != VREG) {
474 (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp);
475 curp->p_traceflag &= ~KTRFAC_ACTIVE;
476 return (EACCES);
477 }
478 /*
479 * XXX This uses up a file descriptor slot in the
480 * tracing process for the duration of this syscall.
481 * This is not expected to be a problem. If
482 * falloc(NULL, ...) DTRT we could skip that part, but
483 * that would require changing its interface to allow
484 * the caller to pass in a ucred..
485 *
486 * This will FILE_USE the fp it returns, if any.
487 * Keep it in use until we return.
488 */
489 if ((error = falloc(curp, &fp, &fd)) != 0)
490 goto done;
491
492 fp->f_flag = FWRITE|FAPPEND;
493 fp->f_type = DTYPE_VNODE;
494 fp->f_ops = &vnops;
495 fp->f_data = (caddr_t)vp;
496 FILE_SET_MATURE(fp);
497 vp = NULL;
498 }
499 error = ktrace_common(curp, SCARG(uap, ops), SCARG(uap, facs),
500 SCARG(uap, pid), fp);
501 done:
502 if (vp != NULL)
503 (void) vn_close(vp, FWRITE, curp->p_ucred, curp);
504 if (fp != NULL) {
505 FILE_UNUSE(fp, curp); /* release file */
506 fdrelease(curp, fd); /* release fd table slot */
507 }
508 return (error);
509 }
510
511 int
512 ktrops(struct proc *curp, struct proc *p, int ops, int facs, struct file *fp)
513 {
514
515 if (!ktrcanset(curp, p))
516 return (0);
517 if (KTROP(ops) == KTROP_SET) {
518 if (p->p_tracep != fp) {
519 /*
520 * if trace file already in use, relinquish
521 */
522 ktrderef(p);
523 p->p_tracep = fp;
524 ktradref(p);
525 }
526 p->p_traceflag |= facs;
527 if (curp->p_ucred->cr_uid == 0)
528 p->p_traceflag |= KTRFAC_ROOT;
529 } else {
530 /* KTROP_CLEAR */
531 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
532 /* no more tracing */
533 ktrderef(p);
534 }
535 }
536
537 /*
538 * Emit an emulation record, every time there is a ktrace
539 * change/attach request.
540 */
541 if (KTRPOINT(p, KTR_EMUL))
542 ktremul(p);
543 #ifdef __HAVE_SYSCALL_INTERN
544 (*p->p_emul->e_syscall_intern)(p);
545 #endif
546
547 return (1);
548 }
549
550 int
551 ktrsetchildren(struct proc *curp, struct proc *top, int ops, int facs,
552 struct file *fp)
553 {
554 struct proc *p;
555 int ret = 0;
556
557 p = top;
558 for (;;) {
559 ret |= ktrops(curp, p, ops, facs, fp);
560 /*
561 * If this process has children, descend to them next,
562 * otherwise do any siblings, and if done with this level,
563 * follow back up the tree (but not past top).
564 */
565 if (LIST_FIRST(&p->p_children) != NULL)
566 p = LIST_FIRST(&p->p_children);
567 else for (;;) {
568 if (p == top)
569 return (ret);
570 if (LIST_NEXT(p, p_sibling) != NULL) {
571 p = LIST_NEXT(p, p_sibling);
572 break;
573 }
574 p = p->p_pptr;
575 }
576 }
577 /*NOTREACHED*/
578 }
579
580 int
581 ktrwrite(struct proc *p, struct ktr_header *kth)
582 {
583 struct uio auio;
584 struct iovec aiov[2];
585 int error, tries;
586 struct file *fp = p->p_tracep;
587
588 if (fp == NULL)
589 return 0;
590
591 auio.uio_iov = &aiov[0];
592 auio.uio_offset = 0;
593 auio.uio_segflg = UIO_SYSSPACE;
594 auio.uio_rw = UIO_WRITE;
595 aiov[0].iov_base = (caddr_t)kth;
596 aiov[0].iov_len = sizeof(struct ktr_header);
597 auio.uio_resid = sizeof(struct ktr_header);
598 auio.uio_iovcnt = 1;
599 auio.uio_procp = (struct proc *)0;
600 if (kth->ktr_len > 0) {
601 auio.uio_iovcnt++;
602 aiov[1].iov_base = kth->ktr_buf;
603 aiov[1].iov_len = kth->ktr_len;
604 auio.uio_resid += kth->ktr_len;
605 }
606
607 FILE_USE(fp);
608
609 tries = 0;
610 do {
611 error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio,
612 fp->f_cred, FOF_UPDATE_OFFSET);
613 tries++;
614 if (error == EWOULDBLOCK)
615 yield();
616 } while ((error == EWOULDBLOCK) && (tries < 3));
617 FILE_UNUSE(fp, NULL);
618
619 if (__predict_true(error == 0))
620 return (0);
621 /*
622 * If error encountered, give up tracing on this vnode. Don't report
623 * EPIPE as this can easily happen with fktrace()/ktruss.
624 */
625 if (error != EPIPE)
626 log(LOG_NOTICE,
627 "ktrace write failed, errno %d, tracing stopped\n",
628 error);
629 proclist_lock_read();
630 for (p = LIST_FIRST(&allproc); p != NULL; p = LIST_NEXT(p, p_list)) {
631 if (ktrsamefile(p->p_tracep, fp))
632 ktrderef(p);
633 }
634 proclist_unlock_read();
635
636 return (error);
637 }
638
639 /*
640 * Return true if caller has permission to set the ktracing state
641 * of target. Essentially, the target can't possess any
642 * more permissions than the caller. KTRFAC_ROOT signifies that
643 * root previously set the tracing status on the target process, and
644 * so, only root may further change it.
645 *
646 * TODO: check groups. use caller effective gid.
647 */
648 int
649 ktrcanset(struct proc *callp, struct proc *targetp)
650 {
651 struct pcred *caller = callp->p_cred;
652 struct pcred *target = targetp->p_cred;
653
654 if ((caller->pc_ucred->cr_uid == target->p_ruid &&
655 target->p_ruid == target->p_svuid &&
656 caller->p_rgid == target->p_rgid && /* XXX */
657 target->p_rgid == target->p_svgid &&
658 (targetp->p_traceflag & KTRFAC_ROOT) == 0 &&
659 (targetp->p_flag & P_SUGID) == 0) ||
660 caller->pc_ucred->cr_uid == 0)
661 return (1);
662
663 return (0);
664 }
665 #endif /* KTRACE */
666
667 /*
668 * Put user defined entry to ktrace records.
669 */
670 int
671 sys_utrace(p, v, retval)
672 struct proc *p;
673 void *v;
674 register_t *retval;
675 {
676 #ifdef KTRACE
677 struct sys_utrace_args /* {
678 syscallarg(const char *) label;
679 syscallarg(void *) addr;
680 syscallarg(size_t) len;
681 } */ *uap = v;
682
683 if (!KTRPOINT(p, KTR_USER))
684 return (0);
685
686 if (SCARG(uap, len) > KTR_USER_MAXLEN)
687 return (EINVAL);
688
689 ktruser(p, SCARG(uap, label), SCARG(uap, addr), SCARG(uap, len), 1);
690
691 return (0);
692 #else /* !KTRACE */
693 return ENOSYS;
694 #endif /* KTRACE */
695 }
696