kern_ktrace.c revision 1.53.2.8 1 /* $NetBSD: kern_ktrace.c,v 1.53.2.8 2002/11/11 22:13:43 nathanw Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)kern_ktrace.c 8.5 (Berkeley) 5/14/95
36 */
37
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.53.2.8 2002/11/11 22:13:43 nathanw Exp $");
40
41 #include "opt_ktrace.h"
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/file.h>
47 #include <sys/namei.h>
48 #include <sys/vnode.h>
49 #include <sys/ktrace.h>
50 #include <sys/malloc.h>
51 #include <sys/syslog.h>
52 #include <sys/filedesc.h>
53 #include <sys/ioctl.h>
54
55 #include <sys/mount.h>
56 #include <sys/sa.h>
57 #include <sys/syscallargs.h>
58
59 #ifdef KTRACE
60
61 int ktrace_common(struct proc *, int, int, int, struct file *);
62 void ktrinitheader(struct ktr_header *, struct proc *, int);
63 int ktrops(struct proc *, struct proc *, int, int, struct file *);
64 int ktrsetchildren(struct proc *, struct proc *, int, int,
65 struct file *);
66 int ktrwrite(struct proc *, struct ktr_header *);
67 int ktrcanset(struct proc *, struct proc *);
68 int ktrsamefile(struct file *, struct file *);
69
70 /*
71 * "deep" compare of two files for the purposes of clearing a trace.
72 * Returns true if they're the same open file, or if they point at the
73 * same underlying vnode/socket.
74 */
75
76 int
77 ktrsamefile(struct file *f1, struct file *f2)
78 {
79 return ((f1 == f2) ||
80 ((f1 != NULL) && (f2 != NULL) &&
81 (f1->f_type == f2->f_type) &&
82 (f1->f_data == f2->f_data)));
83 }
84
85 void
86 ktrderef(struct proc *p)
87 {
88 struct file *fp = p->p_tracep;
89 p->p_traceflag = 0;
90 if (fp == NULL)
91 return;
92 FILE_USE(fp);
93
94 /*
95 * ktrace file descriptor can't be watched (are not visible to
96 * userspace), so no kqueue stuff here
97 */
98 closef(fp, NULL);
99
100 p->p_tracep = NULL;
101 }
102
103 void
104 ktradref(struct proc *p)
105 {
106 struct file *fp = p->p_tracep;
107
108 fp->f_count++;
109 }
110
111 void
112 ktrinitheader(struct ktr_header *kth, struct proc *p, int type)
113 {
114
115 memset(kth, 0, sizeof(*kth));
116 kth->ktr_type = type;
117 microtime(&kth->ktr_time);
118 kth->ktr_pid = p->p_pid;
119 memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN);
120 }
121
122 void
123 ktrsyscall(struct proc *p, register_t code, register_t args[])
124 {
125 struct ktr_header kth;
126 struct ktr_syscall *ktp;
127 register_t *argp;
128 int argsize;
129 size_t len;
130 u_int i;
131
132 argsize = p->p_emul->e_sysent[code].sy_narg * sizeof (register_t);
133 len = sizeof(struct ktr_syscall) + argsize;
134
135 p->p_traceflag |= KTRFAC_ACTIVE;
136 ktrinitheader(&kth, p, KTR_SYSCALL);
137 ktp = malloc(len, M_TEMP, M_WAITOK);
138 ktp->ktr_code = code;
139 ktp->ktr_argsize = argsize;
140 argp = (register_t *)((char *)ktp + sizeof(struct ktr_syscall));
141 for (i = 0; i < (argsize / sizeof(*argp)); i++)
142 *argp++ = args[i];
143 kth.ktr_buf = (caddr_t)ktp;
144 kth.ktr_len = len;
145 (void) ktrwrite(p, &kth);
146 free(ktp, M_TEMP);
147 p->p_traceflag &= ~KTRFAC_ACTIVE;
148 }
149
150 void
151 ktrsysret(struct proc *p, register_t code, int error, register_t retval)
152 {
153 struct ktr_header kth;
154 struct ktr_sysret ktp;
155
156 p->p_traceflag |= KTRFAC_ACTIVE;
157 ktrinitheader(&kth, p, KTR_SYSRET);
158 ktp.ktr_code = code;
159 ktp.ktr_eosys = 0; /* XXX unused */
160 ktp.ktr_error = error;
161 ktp.ktr_retval = retval; /* what about val2 ? */
162
163 kth.ktr_buf = (caddr_t)&ktp;
164 kth.ktr_len = sizeof(struct ktr_sysret);
165
166 (void) ktrwrite(p, &kth);
167 p->p_traceflag &= ~KTRFAC_ACTIVE;
168 }
169
170 void
171 ktrnamei(struct proc *p, char *path)
172 {
173 struct ktr_header kth;
174
175 p->p_traceflag |= KTRFAC_ACTIVE;
176 ktrinitheader(&kth, p, KTR_NAMEI);
177 kth.ktr_len = strlen(path);
178 kth.ktr_buf = path;
179
180 (void) ktrwrite(p, &kth);
181 p->p_traceflag &= ~KTRFAC_ACTIVE;
182 }
183
184 void
185 ktremul(struct proc *p)
186 {
187 struct ktr_header kth;
188 const char *emul = p->p_emul->e_name;
189
190 p->p_traceflag |= KTRFAC_ACTIVE;
191 ktrinitheader(&kth, p, KTR_EMUL);
192 kth.ktr_len = strlen(emul);
193 kth.ktr_buf = (caddr_t)emul;
194
195 (void) ktrwrite(p, &kth);
196 p->p_traceflag &= ~KTRFAC_ACTIVE;
197 }
198
199 void
200 ktrgenio(struct proc *p, int fd, enum uio_rw rw, struct iovec *iov,
201 int len, int error)
202 {
203 struct ktr_header kth;
204 struct ktr_genio *ktp;
205 caddr_t cp;
206 int resid = len, cnt;
207 int buflen;
208
209 if (error)
210 return;
211
212 p->p_traceflag |= KTRFAC_ACTIVE;
213
214 buflen = min(PAGE_SIZE, len + sizeof(struct ktr_genio));
215
216 ktrinitheader(&kth, p, KTR_GENIO);
217 ktp = malloc(buflen, M_TEMP, M_WAITOK);
218 ktp->ktr_fd = fd;
219 ktp->ktr_rw = rw;
220
221 kth.ktr_buf = (caddr_t)ktp;
222
223 cp = (caddr_t)((char *)ktp + sizeof(struct ktr_genio));
224 buflen -= sizeof(struct ktr_genio);
225
226 while (resid > 0) {
227 #if 0 /* XXX NJWLWP */
228 KDASSERT(p->p_cpu != NULL);
229 KDASSERT(p->p_cpu == curcpu());
230 #endif
231 /* XXX NJWLWP */
232 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
233 preempt(NULL);
234
235 cnt = min(iov->iov_len, buflen);
236 if (cnt > resid)
237 cnt = resid;
238 if (copyin(iov->iov_base, cp, cnt))
239 break;
240
241 kth.ktr_len = cnt + sizeof(struct ktr_genio);
242
243 if (__predict_false(ktrwrite(p, &kth) != 0))
244 break;
245
246 iov->iov_base = (caddr_t)iov->iov_base + cnt;
247 iov->iov_len -= cnt;
248
249 if (iov->iov_len == 0)
250 iov++;
251
252 resid -= cnt;
253 }
254
255 free(ktp, M_TEMP);
256 p->p_traceflag &= ~KTRFAC_ACTIVE;
257 }
258
259 void
260 ktrpsig(struct proc *p, int sig, sig_t action, sigset_t *mask, int code)
261 {
262 struct ktr_header kth;
263 struct ktr_psig kp;
264
265 p->p_traceflag |= KTRFAC_ACTIVE;
266 ktrinitheader(&kth, p, KTR_PSIG);
267 kp.signo = (char)sig;
268 kp.action = action;
269 kp.mask = *mask;
270 kp.code = code;
271 kth.ktr_buf = (caddr_t)&kp;
272 kth.ktr_len = sizeof(struct ktr_psig);
273
274 (void) ktrwrite(p, &kth);
275 p->p_traceflag &= ~KTRFAC_ACTIVE;
276 }
277
278 void
279 ktrcsw(struct proc *p, int out, int user)
280 {
281 struct ktr_header kth;
282 struct ktr_csw kc;
283
284 p->p_traceflag |= KTRFAC_ACTIVE;
285 ktrinitheader(&kth, p, KTR_CSW);
286 kc.out = out;
287 kc.user = user;
288 kth.ktr_buf = (caddr_t)&kc;
289 kth.ktr_len = sizeof(struct ktr_csw);
290
291 (void) ktrwrite(p, &kth);
292 p->p_traceflag &= ~KTRFAC_ACTIVE;
293 }
294
295 void
296 ktruser(p, id, addr, len, ustr)
297 struct proc *p;
298 const char *id;
299 void *addr;
300 size_t len;
301 int ustr;
302 {
303 struct ktr_header kth;
304 struct ktr_user *ktp;
305 caddr_t user_dta;
306
307 p->p_traceflag |= KTRFAC_ACTIVE;
308 ktrinitheader(&kth, p, KTR_USER);
309 ktp = malloc(sizeof(struct ktr_user) + len, M_TEMP, M_WAITOK);
310 if (ustr) {
311 if (copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL) != 0)
312 ktp->ktr_id[0] = '\0';
313 } else
314 strncpy(ktp->ktr_id, id, KTR_USER_MAXIDLEN);
315 ktp->ktr_id[KTR_USER_MAXIDLEN-1] = '\0';
316
317 user_dta = (caddr_t) ((char *)ktp + sizeof(struct ktr_user));
318 if (copyin(addr, (void *) user_dta, len) != 0)
319 len = 0;
320
321 kth.ktr_buf = (void *)ktp;
322 kth.ktr_len = sizeof(struct ktr_user) + len;
323 (void) ktrwrite(p, &kth);
324
325 free(ktp, M_TEMP);
326 p->p_traceflag &= ~KTRFAC_ACTIVE;
327
328 }
329
330 /* Interface and common routines */
331
332 int
333 ktrace_common(struct proc *curp, int ops, int facs, int pid, struct file *fp)
334 {
335 int ret = 0;
336 int error = 0;
337 int one = 1;
338 int descend;
339 struct proc *p;
340 struct pgrp *pg;
341
342 curp->p_traceflag |= KTRFAC_ACTIVE;
343 descend = ops & KTRFLAG_DESCEND;
344 facs = facs & ~((unsigned) KTRFAC_ROOT);
345
346 /*
347 * Clear all uses of the tracefile
348 */
349 if (KTROP(ops) == KTROP_CLEARFILE) {
350 proclist_lock_read();
351 for (p = LIST_FIRST(&allproc); p != NULL;
352 p = LIST_NEXT(p, p_list)) {
353 if (ktrsamefile(p->p_tracep, fp)) {
354 if (ktrcanset(curp, p))
355 ktrderef(p);
356 else
357 error = EPERM;
358 }
359 }
360 proclist_unlock_read();
361 goto done;
362 }
363
364 /*
365 * Mark fp non-blocking, to avoid problems from possible deadlocks.
366 */
367
368 if (fp != NULL) {
369 fp->f_flag |= FNONBLOCK;
370 (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&one, curp);
371 }
372
373 /*
374 * need something to (un)trace (XXX - why is this here?)
375 */
376 if (!facs) {
377 error = EINVAL;
378 goto done;
379 }
380 /*
381 * do it
382 */
383 if (pid < 0) {
384 /*
385 * by process group
386 */
387 pg = pgfind(-pid);
388 if (pg == NULL) {
389 error = ESRCH;
390 goto done;
391 }
392 for (p = LIST_FIRST(&pg->pg_members); p != NULL;
393 p = LIST_NEXT(p, p_pglist)) {
394 if (descend)
395 ret |= ktrsetchildren(curp, p, ops, facs, fp);
396 else
397 ret |= ktrops(curp, p, ops, facs, fp);
398 }
399
400 } else {
401 /*
402 * by pid
403 */
404 p = pfind(pid);
405 if (p == NULL) {
406 error = ESRCH;
407 goto done;
408 }
409 if (descend)
410 ret |= ktrsetchildren(curp, p, ops, facs, fp);
411 else
412 ret |= ktrops(curp, p, ops, facs, fp);
413 }
414 if (!ret)
415 error = EPERM;
416 done:
417 curp->p_traceflag &= ~KTRFAC_ACTIVE;
418 return (error);
419 }
420
421 /*
422 * ktrace system call
423 */
424 /* ARGSUSED */
425 int
426 sys_fktrace(struct lwp *l, void *v, register_t *retval)
427 {
428 struct sys_fktrace_args /* {
429 syscallarg(int) fd;
430 syscallarg(int) ops;
431 syscallarg(int) facs;
432 syscallarg(int) pid;
433 } */ *uap = v;
434 struct proc *curp = l->l_proc;
435 struct file *fp = NULL;
436 struct filedesc *fdp = curp->p_fd;
437
438 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
439 return (EBADF);
440
441 if ((fp->f_flag & FWRITE) == 0)
442 return (EBADF);
443
444 return ktrace_common(curp, SCARG(uap, ops),
445 SCARG(uap, facs), SCARG(uap, pid), fp);
446 }
447
448 /*
449 * ktrace system call
450 */
451 /* ARGSUSED */
452 int
453 sys_ktrace(struct lwp *l, void *v, register_t *retval)
454 {
455 struct sys_ktrace_args /* {
456 syscallarg(const char *) fname;
457 syscallarg(int) ops;
458 syscallarg(int) facs;
459 syscallarg(int) pid;
460 } */ *uap = v;
461 struct proc *curp = l->l_proc;
462 struct vnode *vp = NULL;
463 struct file *fp = NULL;
464 int fd;
465 int ops = SCARG(uap, ops);
466 int error = 0;
467 struct nameidata nd;
468
469 ops = KTROP(ops) | (ops & KTRFLAG_DESCEND);
470
471 curp->p_traceflag |= KTRFAC_ACTIVE;
472 if (ops != KTROP_CLEAR) {
473 /*
474 * an operation which requires a file argument.
475 */
476 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname),
477 curp);
478 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
479 curp->p_traceflag &= ~KTRFAC_ACTIVE;
480 return (error);
481 }
482 vp = nd.ni_vp;
483 VOP_UNLOCK(vp, 0);
484 if (vp->v_type != VREG) {
485 (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp);
486 curp->p_traceflag &= ~KTRFAC_ACTIVE;
487 return (EACCES);
488 }
489 /*
490 * XXX This uses up a file descriptor slot in the
491 * tracing process for the duration of this syscall.
492 * This is not expected to be a problem. If
493 * falloc(NULL, ...) DTRT we could skip that part, but
494 * that would require changing its interface to allow
495 * the caller to pass in a ucred..
496 *
497 * This will FILE_USE the fp it returns, if any.
498 * Keep it in use until we return.
499 */
500 if ((error = falloc(curp, &fp, &fd)) != 0)
501 goto done;
502
503 fp->f_flag = FWRITE|FAPPEND;
504 fp->f_type = DTYPE_VNODE;
505 fp->f_ops = &vnops;
506 fp->f_data = (caddr_t)vp;
507 FILE_SET_MATURE(fp);
508 vp = NULL;
509 }
510 error = ktrace_common(curp, SCARG(uap, ops), SCARG(uap, facs),
511 SCARG(uap, pid), fp);
512 done:
513 if (vp != NULL)
514 (void) vn_close(vp, FWRITE, curp->p_ucred, curp);
515 if (fp != NULL) {
516 FILE_UNUSE(fp, curp); /* release file */
517 fdrelease(curp, fd); /* release fd table slot */
518 }
519 return (error);
520 }
521
522 int
523 ktrops(struct proc *curp, struct proc *p, int ops, int facs, struct file *fp)
524 {
525
526 if (!ktrcanset(curp, p))
527 return (0);
528 if (KTROP(ops) == KTROP_SET) {
529 if (p->p_tracep != fp) {
530 /*
531 * if trace file already in use, relinquish
532 */
533 ktrderef(p);
534 p->p_tracep = fp;
535 ktradref(p);
536 }
537 p->p_traceflag |= facs;
538 if (curp->p_ucred->cr_uid == 0)
539 p->p_traceflag |= KTRFAC_ROOT;
540 } else {
541 /* KTROP_CLEAR */
542 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
543 /* no more tracing */
544 ktrderef(p);
545 }
546 }
547
548 /*
549 * Emit an emulation record, every time there is a ktrace
550 * change/attach request.
551 */
552 if (KTRPOINT(p, KTR_EMUL))
553 ktremul(p);
554 #ifdef __HAVE_SYSCALL_INTERN
555 (*p->p_emul->e_syscall_intern)(p);
556 #endif
557
558 return (1);
559 }
560
561 int
562 ktrsetchildren(struct proc *curp, struct proc *top, int ops, int facs,
563 struct file *fp)
564 {
565 struct proc *p;
566 int ret = 0;
567
568 p = top;
569 for (;;) {
570 ret |= ktrops(curp, p, ops, facs, fp);
571 /*
572 * If this process has children, descend to them next,
573 * otherwise do any siblings, and if done with this level,
574 * follow back up the tree (but not past top).
575 */
576 if (LIST_FIRST(&p->p_children) != NULL)
577 p = LIST_FIRST(&p->p_children);
578 else for (;;) {
579 if (p == top)
580 return (ret);
581 if (LIST_NEXT(p, p_sibling) != NULL) {
582 p = LIST_NEXT(p, p_sibling);
583 break;
584 }
585 p = p->p_pptr;
586 }
587 }
588 /*NOTREACHED*/
589 }
590
591 int
592 ktrwrite(struct proc *p, struct ktr_header *kth)
593 {
594 struct uio auio;
595 struct iovec aiov[2];
596 int error, tries;
597 struct file *fp = p->p_tracep;
598
599 if (fp == NULL)
600 return 0;
601
602 auio.uio_iov = &aiov[0];
603 auio.uio_offset = 0;
604 auio.uio_segflg = UIO_SYSSPACE;
605 auio.uio_rw = UIO_WRITE;
606 aiov[0].iov_base = (caddr_t)kth;
607 aiov[0].iov_len = sizeof(struct ktr_header);
608 auio.uio_resid = sizeof(struct ktr_header);
609 auio.uio_iovcnt = 1;
610 auio.uio_procp = (struct proc *)0;
611 if (kth->ktr_len > 0) {
612 auio.uio_iovcnt++;
613 aiov[1].iov_base = kth->ktr_buf;
614 aiov[1].iov_len = kth->ktr_len;
615 auio.uio_resid += kth->ktr_len;
616 }
617
618 FILE_USE(fp);
619
620 tries = 0;
621 do {
622 error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio,
623 fp->f_cred, FOF_UPDATE_OFFSET);
624 tries++;
625 if (error == EWOULDBLOCK)
626 yield();
627 } while ((error == EWOULDBLOCK) && (tries < 3));
628 FILE_UNUSE(fp, NULL);
629
630 if (__predict_true(error == 0))
631 return (0);
632 /*
633 * If error encountered, give up tracing on this vnode. Don't report
634 * EPIPE as this can easily happen with fktrace()/ktruss.
635 */
636 if (error != EPIPE)
637 log(LOG_NOTICE,
638 "ktrace write failed, errno %d, tracing stopped\n",
639 error);
640 proclist_lock_read();
641 for (p = LIST_FIRST(&allproc); p != NULL; p = LIST_NEXT(p, p_list)) {
642 if (ktrsamefile(p->p_tracep, fp))
643 ktrderef(p);
644 }
645 proclist_unlock_read();
646
647 return (error);
648 }
649
650 /*
651 * Return true if caller has permission to set the ktracing state
652 * of target. Essentially, the target can't possess any
653 * more permissions than the caller. KTRFAC_ROOT signifies that
654 * root previously set the tracing status on the target process, and
655 * so, only root may further change it.
656 *
657 * TODO: check groups. use caller effective gid.
658 */
659 int
660 ktrcanset(struct proc *callp, struct proc *targetp)
661 {
662 struct pcred *caller = callp->p_cred;
663 struct pcred *target = targetp->p_cred;
664
665 if ((caller->pc_ucred->cr_uid == target->p_ruid &&
666 target->p_ruid == target->p_svuid &&
667 caller->p_rgid == target->p_rgid && /* XXX */
668 target->p_rgid == target->p_svgid &&
669 (targetp->p_traceflag & KTRFAC_ROOT) == 0 &&
670 (targetp->p_flag & P_SUGID) == 0) ||
671 caller->pc_ucred->cr_uid == 0)
672 return (1);
673
674 return (0);
675 }
676 #endif /* KTRACE */
677
678 /*
679 * Put user defined entry to ktrace records.
680 */
681 int
682 sys_utrace(l, v, retval)
683 struct lwp *l;
684 void *v;
685 register_t *retval;
686 {
687 #ifdef KTRACE
688 struct sys_utrace_args /* {
689 syscallarg(const char *) label;
690 syscallarg(void *) addr;
691 syscallarg(size_t) len;
692 } */ *uap = v;
693 struct proc *p = l->l_proc;
694 if (!KTRPOINT(p, KTR_USER))
695 return (0);
696
697 if (SCARG(uap, len) > KTR_USER_MAXLEN)
698 return (EINVAL);
699
700 ktruser(p, SCARG(uap, label), SCARG(uap, addr), SCARG(uap, len), 1);
701
702 return (0);
703 #else /* !KTRACE */
704 return ENOSYS;
705 #endif /* KTRACE */
706 }
707