kern_ktrace.c revision 1.74.2.9 1 /* $NetBSD: kern_ktrace.c,v 1.74.2.9 2004/10/17 07:44:37 skrll Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)kern_ktrace.c 8.5 (Berkeley) 5/14/95
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: kern_ktrace.c,v 1.74.2.9 2004/10/17 07:44:37 skrll Exp $");
36
37 #include "opt_ktrace.h"
38 #include "opt_compat_mach.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/proc.h>
43 #include <sys/file.h>
44 #include <sys/namei.h>
45 #include <sys/vnode.h>
46 #include <sys/kernel.h>
47 #include <sys/kthread.h>
48 #include <sys/ktrace.h>
49 #include <sys/malloc.h>
50 #include <sys/syslog.h>
51 #include <sys/filedesc.h>
52 #include <sys/ioctl.h>
53 #include <sys/callout.h>
54
55 #include <sys/mount.h>
56 #include <sys/sa.h>
57 #include <sys/syscallargs.h>
58
59 #ifdef KTRACE
60
61 /*
62 * XXX:
63 * - need better error reporting?
64 * - p->p_tracep access lock. lock p_lock, lock ktd if !NULL, inc ref.
65 * - userland utility to sort ktrace.out by timestamp.
66 * - keep minimum information in ktrace_entry when rest of alloc failed.
67 * - enlarge ktrace_entry so that small entry won't require additional
68 * alloc?
69 * - per trace control of configurable parameters.
70 */
71
72 struct ktrace_entry {
73 TAILQ_ENTRY(ktrace_entry) kte_list;
74 union {
75 struct ktr_header un_kte_kth;
76 struct ktr_compat un_kte_ktc;
77 } kte_un;
78 #define kte_ktc kte_un.un_kte_ktc
79 #define kte_kth kte_un.un_kte_kth
80 void *kte_buf;
81 };
82
83 struct ktr_desc {
84 TAILQ_ENTRY(ktr_desc) ktd_list;
85 int ktd_flags;
86 #define KTDF_WAIT 0x0001
87 #define KTDF_DONE 0x0002
88 #define KTDF_BLOCKING 0x0004
89 #define KTDF_INTERACTIVE 0x0008
90 int ktd_error;
91 #define KTDE_ENOMEM 0x0001
92 #define KTDE_ENOSPC 0x0002
93 int ktd_errcnt;
94 int ktd_ref; /* # of reference */
95 int ktd_qcount; /* # of entry in the queue */
96
97 /*
98 * Params to control behaviour.
99 */
100 int ktd_delayqcnt; /* # of entry allowed to delay */
101 int ktd_wakedelay; /* delay of wakeup in *tick* */
102 int ktd_intrwakdl; /* ditto, but when interactive */
103
104 struct file *ktd_fp; /* trace output file */
105 struct proc *ktd_proc; /* our kernel thread */
106 TAILQ_HEAD(, ktrace_entry) ktd_queue;
107 struct callout ktd_wakch; /* delayed wakeup */
108 struct simplelock ktd_slock;
109 };
110
111 static void ktrinitheader(struct ktr_header *, struct lwp *, int);
112 static void ktrwrite(struct ktr_desc *, struct ktrace_entry *);
113 static int ktrace_common(struct proc *, int, int, int, struct file *);
114 static int ktrops(struct proc *, struct proc *, int, int,
115 struct ktr_desc *);
116 static int ktrsetchildren(struct proc *, struct proc *, int, int,
117 struct ktr_desc *);
118 static int ktrcanset(struct proc *, struct proc *);
119 static int ktrsamefile(struct file *, struct file *);
120
121 static struct ktr_desc *
122 ktd_lookup(struct file *);
123 static void ktdrel(struct ktr_desc *);
124 static void ktdref(struct ktr_desc *);
125 static void ktraddentry(struct lwp *, struct ktrace_entry *, int);
126 /* Flags for ktraddentry (3rd arg) */
127 #define KTA_NOWAIT 0x0000
128 #define KTA_WAITOK 0x0001
129 #define KTA_LARGE 0x0002
130 static void ktefree(struct ktrace_entry *);
131 static void ktd_logerrl(struct ktr_desc *, int);
132 static void ktd_logerr(struct proc *, int);
133 static void ktrace_thread(void *);
134
135 /*
136 * Default vaules.
137 */
138 #define KTD_MAXENTRY 1000 /* XXX: tune */
139 #define KTD_TIMEOUT 5 /* XXX: tune */
140 #define KTD_DELAYQCNT 100 /* XXX: tune */
141 #define KTD_WAKEDELAY 5000 /* XXX: tune */
142 #define KTD_INTRWAKDL 100 /* XXX: tune */
143
144 /*
145 * Patchable variables.
146 */
147 int ktd_maxentry = KTD_MAXENTRY; /* max # of entry in the queue */
148 int ktd_timeout = KTD_TIMEOUT; /* timeout in seconds */
149 int ktd_delayqcnt = KTD_DELAYQCNT; /* # of entry allowed to delay */
150 int ktd_wakedelay = KTD_WAKEDELAY; /* delay of wakeup in *ms* */
151 int ktd_intrwakdl = KTD_INTRWAKDL; /* ditto, but when interactive */
152
153 static struct simplelock ktdq_slock = SIMPLELOCK_INITIALIZER;
154 static TAILQ_HEAD(, ktr_desc) ktdq = TAILQ_HEAD_INITIALIZER(ktdq);
155
156 MALLOC_DEFINE(M_KTRACE, "ktrace", "ktrace data buffer");
157 POOL_INIT(kte_pool, sizeof(struct ktrace_entry), 0, 0, 0,
158 "ktepl", &pool_allocator_nointr);
159
160 static __inline void
161 ktd_wakeup(struct ktr_desc *ktd)
162 {
163
164 callout_stop(&ktd->ktd_wakch);
165 wakeup(ktd);
166 }
167
168 static void
169 ktd_logerrl(struct ktr_desc *ktd, int error)
170 {
171
172 ktd->ktd_error |= error;
173 ktd->ktd_errcnt++;
174 }
175
176 static void
177 ktd_logerr(struct proc *p, int error)
178 {
179 struct ktr_desc *ktd = p->p_tracep;
180
181 if (ktd == NULL)
182 return;
183
184 simple_lock(&ktd->ktd_slock);
185 ktd_logerrl(ktd, error);
186 simple_unlock(&ktd->ktd_slock);
187 }
188
189 /*
190 * Release a reference. Called with ktd_slock held.
191 */
192 void
193 ktdrel(struct ktr_desc *ktd)
194 {
195
196 KDASSERT(ktd->ktd_ref != 0);
197 KASSERT(ktd->ktd_ref > 0);
198 if (--ktd->ktd_ref <= 0) {
199 ktd->ktd_flags |= KTDF_DONE;
200 wakeup(ktd);
201 }
202 simple_unlock(&ktd->ktd_slock);
203 }
204
205 void
206 ktdref(struct ktr_desc *ktd)
207 {
208
209 simple_lock(&ktd->ktd_slock);
210 ktd->ktd_ref++;
211 simple_unlock(&ktd->ktd_slock);
212 }
213
214 struct ktr_desc *
215 ktd_lookup(struct file *fp)
216 {
217 struct ktr_desc *ktd;
218
219 simple_lock(&ktdq_slock);
220 for (ktd = TAILQ_FIRST(&ktdq); ktd != NULL;
221 ktd = TAILQ_NEXT(ktd, ktd_list)) {
222 simple_lock(&ktd->ktd_slock);
223 if (ktrsamefile(ktd->ktd_fp, fp)) {
224 ktd->ktd_ref++;
225 simple_unlock(&ktd->ktd_slock);
226 break;
227 }
228 simple_unlock(&ktd->ktd_slock);
229 }
230 simple_unlock(&ktdq_slock);
231 return (ktd);
232 }
233
234 void
235 ktraddentry(struct lwp *l, struct ktrace_entry *kte, int flags)
236 {
237 struct proc *p = l->l_proc;
238 struct ktr_desc *ktd;
239 #ifdef DEBUG
240 struct timeval t;
241 int s;
242 #endif
243
244 if (p->p_traceflag & KTRFAC_TRC_EMUL) {
245 /* Add emulation trace before first entry for this process */
246 p->p_traceflag &= ~KTRFAC_TRC_EMUL;
247 ktremul(l);
248 }
249
250 /*
251 * Tracing may be canceled while we were sleeping waiting for
252 * memory.
253 */
254 ktd = p->p_tracep;
255 if (ktd == NULL)
256 goto freekte;
257
258 /*
259 * Bump reference count so that the object will remain while
260 * we are here. Note that the trace is controlled by other
261 * process.
262 */
263 ktdref(ktd);
264
265 simple_lock(&ktd->ktd_slock);
266 if (ktd->ktd_flags & KTDF_DONE)
267 goto relktd;
268
269 if (ktd->ktd_qcount > ktd_maxentry) {
270 ktd_logerrl(ktd, KTDE_ENOSPC);
271 goto relktd;
272 }
273 TAILQ_INSERT_TAIL(&ktd->ktd_queue, kte, kte_list);
274 ktd->ktd_qcount++;
275 if (ktd->ktd_flags & KTDF_BLOCKING)
276 goto skip_sync;
277
278 if (flags & KTA_WAITOK &&
279 (/* flags & KTA_LARGE */0 || ktd->ktd_flags & KTDF_WAIT ||
280 ktd->ktd_qcount > ktd_maxentry >> 1))
281 /*
282 * Sync with writer thread since we're requesting rather
283 * big one or many requests are pending.
284 */
285 do {
286 ktd->ktd_flags |= KTDF_WAIT;
287 ktd_wakeup(ktd);
288 #ifdef DEBUG
289 s = splclock();
290 t = mono_time;
291 splx(s);
292 #endif
293 if (ltsleep(&ktd->ktd_flags, PWAIT, "ktrsync",
294 ktd_timeout * hz, &ktd->ktd_slock) != 0) {
295 ktd->ktd_flags |= KTDF_BLOCKING;
296 /*
297 * Maybe the writer thread is blocking
298 * completely for some reason, but
299 * don't stop target process forever.
300 */
301 log(LOG_NOTICE, "ktrace timeout\n");
302 break;
303 }
304 #ifdef DEBUG
305 s = splclock();
306 timersub(&mono_time, &t, &t);
307 splx(s);
308 if (t.tv_sec > 0)
309 log(LOG_NOTICE,
310 "ktrace long wait: %ld.%06ld\n",
311 t.tv_sec, t.tv_usec);
312 #endif
313 } while (p->p_tracep == ktd &&
314 (ktd->ktd_flags & (KTDF_WAIT | KTDF_DONE)) == KTDF_WAIT);
315 else {
316 /* Schedule delayed wakeup */
317 if (ktd->ktd_qcount > ktd->ktd_delayqcnt)
318 ktd_wakeup(ktd); /* Wakeup now */
319 else if (!callout_pending(&ktd->ktd_wakch))
320 callout_reset(&ktd->ktd_wakch,
321 ktd->ktd_flags & KTDF_INTERACTIVE ?
322 ktd->ktd_intrwakdl : ktd->ktd_wakedelay,
323 (void (*)(void *))wakeup, ktd);
324 }
325
326 skip_sync:
327 ktdrel(ktd);
328 return;
329
330 relktd:
331 ktdrel(ktd);
332
333 freekte:
334 ktefree(kte);
335 }
336
337 void
338 ktefree(struct ktrace_entry *kte)
339 {
340
341 if (kte->kte_buf != NULL)
342 free(kte->kte_buf, M_KTRACE);
343 pool_put(&kte_pool, kte);
344 }
345
346 /*
347 * "deep" compare of two files for the purposes of clearing a trace.
348 * Returns true if they're the same open file, or if they point at the
349 * same underlying vnode/socket.
350 */
351
352 int
353 ktrsamefile(struct file *f1, struct file *f2)
354 {
355
356 return ((f1 == f2) ||
357 ((f1 != NULL) && (f2 != NULL) &&
358 (f1->f_type == f2->f_type) &&
359 (f1->f_data == f2->f_data)));
360 }
361
362 void
363 ktrderef(struct proc *p)
364 {
365 struct ktr_desc *ktd = p->p_tracep;
366
367 p->p_traceflag = 0;
368 if (ktd == NULL)
369 return;
370 p->p_tracep = NULL;
371
372 simple_lock(&ktd->ktd_slock);
373 wakeup(&ktd->ktd_flags);
374 ktdrel(ktd);
375 }
376
377 void
378 ktradref(struct proc *p)
379 {
380 struct ktr_desc *ktd = p->p_tracep;
381
382 ktdref(ktd);
383 }
384
385 void
386 ktrinitheader(struct ktr_header *kth, struct lwp *l, int type)
387 {
388 struct proc *p = l->l_proc;
389 struct timeval tv;
390
391 (void)memset(kth, 0, sizeof(*kth));
392 kth->ktr_type = type;
393 microtime(&tv);
394 TIMEVAL_TO_TIMESPEC(&tv, &kth->ktr_time);
395 kth->ktr_pid = p->p_pid;
396 memcpy(kth->ktr_comm, p->p_comm, MAXCOMLEN);
397
398 kth->ktr_type |= KTRFAC_VERSION(p->p_traceflag) << KTR_VER_SHIFT;
399
400 switch (KTRFAC_VERSION(p->p_traceflag)) {
401 case 0:
402 /* This is the original format */
403 break;
404 case 1:
405 kth->ktr_lid = l->l_lid;
406 break;
407 default:
408 break;
409 }
410 }
411
412 void
413 ktrsyscall(struct lwp *l, register_t code, register_t realcode,
414 const struct sysent *callp, register_t args[])
415 {
416 struct proc *p = l->l_proc;
417 struct ktrace_entry *kte;
418 struct ktr_header *kth;
419 struct ktr_syscall *ktp;
420 register_t *argp;
421 int argsize;
422 size_t len;
423 u_int i;
424
425 if (callp == NULL)
426 callp = p->p_emul->e_sysent;
427
428 argsize = callp[code].sy_argsize;
429 #ifdef _LP64
430 if (p->p_flag & P_32)
431 argsize = argsize << 1;
432 #endif
433 len = sizeof(struct ktr_syscall) + argsize;
434
435 p->p_traceflag |= KTRFAC_ACTIVE;
436 kte = pool_get(&kte_pool, PR_WAITOK);
437 kth = &kte->kte_kth;
438 ktrinitheader(kth, l, KTR_SYSCALL);
439
440 ktp = malloc(len, M_KTRACE, M_WAITOK);
441 ktp->ktr_code = realcode;
442 ktp->ktr_argsize = argsize;
443 argp = (register_t *)(ktp + 1);
444 for (i = 0; i < (argsize / sizeof(*argp)); i++)
445 *argp++ = args[i];
446 kth->ktr_len = len;
447 kte->kte_buf = ktp;
448
449 ktraddentry(l, kte, KTA_WAITOK);
450 p->p_traceflag &= ~KTRFAC_ACTIVE;
451 }
452
453 void
454 ktrsysret(struct lwp *l, register_t code, int error, register_t *retval)
455 {
456 struct proc *p = l->l_proc;
457 struct ktrace_entry *kte;
458 struct ktr_header *kth;
459 struct ktr_sysret *ktp;
460
461 p->p_traceflag |= KTRFAC_ACTIVE;
462 kte = pool_get(&kte_pool, PR_WAITOK);
463 kth = &kte->kte_kth;
464 ktrinitheader(kth, l, KTR_SYSRET);
465
466 ktp = malloc(sizeof(struct ktr_sysret), M_KTRACE, M_WAITOK);
467 ktp->ktr_code = code;
468 ktp->ktr_eosys = 0; /* XXX unused */
469 ktp->ktr_error = error;
470 ktp->ktr_retval = retval ? retval[0] : 0;
471 ktp->ktr_retval_1 = retval ? retval[1] : 0;
472
473 kth->ktr_len = sizeof(struct ktr_sysret);
474 kte->kte_buf = ktp;
475
476 ktraddentry(l, kte, KTA_WAITOK);
477 p->p_traceflag &= ~KTRFAC_ACTIVE;
478 }
479
480 /*
481 * XXX: ndp->ni_pathlen should be passed.
482 */
483 void
484 ktrnamei(struct lwp *l, char *path)
485 {
486
487 ktrkmem(l, KTR_NAMEI, path, strlen(path));
488 }
489
490 void
491 ktremul(struct lwp *l)
492 {
493 const char *emul = l->l_proc->p_emul->e_name;
494
495 ktrkmem(l, KTR_EMUL, emul, strlen(emul));
496 }
497
498 void
499 ktrkmem(struct lwp *l, int type, const void *buf, size_t len)
500 {
501 struct proc *p = l->l_proc;
502 struct ktrace_entry *kte;
503 struct ktr_header *kth;
504
505 p->p_traceflag |= KTRFAC_ACTIVE;
506 kte = pool_get(&kte_pool, PR_WAITOK);
507 kth = &kte->kte_kth;
508 ktrinitheader(kth, l, type);
509
510 kth->ktr_len = len;
511 kte->kte_buf = malloc(len, M_KTRACE, M_WAITOK);
512 memcpy(kte->kte_buf, buf, len);
513
514 ktraddentry(l, kte, KTA_WAITOK);
515 p->p_traceflag &= ~KTRFAC_ACTIVE;
516 }
517
518 void
519 ktrgenio(struct lwp *l, int fd, enum uio_rw rw, struct iovec *iov,
520 int len, int error)
521 {
522 struct proc *p = l->l_proc;
523 struct ktrace_entry *kte;
524 struct ktr_header *kth;
525 struct ktr_genio *ktp;
526 int resid = len, cnt;
527 caddr_t cp;
528 int buflen;
529
530 if (error)
531 return;
532
533 p->p_traceflag |= KTRFAC_ACTIVE;
534
535 next:
536 buflen = min(PAGE_SIZE, resid + sizeof(struct ktr_genio));
537
538 kte = pool_get(&kte_pool, PR_WAITOK);
539 kth = &kte->kte_kth;
540 ktrinitheader(kth, l, KTR_GENIO);
541
542 ktp = malloc(buflen, M_KTRACE, M_WAITOK);
543 ktp->ktr_fd = fd;
544 ktp->ktr_rw = rw;
545
546 kte->kte_buf = ktp;
547
548 cp = (caddr_t)(ktp + 1);
549 buflen -= sizeof(struct ktr_genio);
550 kth->ktr_len = sizeof(struct ktr_genio);
551
552 while (buflen > 0) {
553 cnt = min(iov->iov_len, buflen);
554 if (copyin(iov->iov_base, cp, cnt) != 0)
555 goto out;
556 kth->ktr_len += cnt;
557 buflen -= cnt;
558 resid -= cnt;
559 iov->iov_len -= cnt;
560 if (iov->iov_len == 0)
561 iov++;
562 else
563 iov->iov_base = (caddr_t)iov->iov_base + cnt;
564 }
565
566 /*
567 * Don't push so many entry at once. It will cause kmem map
568 * shortage.
569 */
570 ktraddentry(l, kte, KTA_WAITOK | KTA_LARGE);
571 if (resid > 0) {
572 #if 0 /* XXX NJWLWP */
573 KDASSERT(p->p_cpu != NULL);
574 KDASSERT(p->p_cpu == curcpu());
575 #endif
576 /* XXX NJWLWP */
577 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
578 preempt(1);
579
580 goto next;
581 }
582
583 p->p_traceflag &= ~KTRFAC_ACTIVE;
584 return;
585
586 out:
587 ktefree(kte);
588 p->p_traceflag &= ~KTRFAC_ACTIVE;
589 }
590
591 void
592 ktrpsig(struct lwp *l, int sig, sig_t action, const sigset_t *mask,
593 const ksiginfo_t *ksi)
594 {
595 struct proc *p = l->l_proc;
596 struct ktrace_entry *kte;
597 struct ktr_header *kth;
598 struct {
599 struct ktr_psig kp;
600 siginfo_t si;
601 } *kbuf;
602
603 p->p_traceflag |= KTRFAC_ACTIVE;
604 kte = pool_get(&kte_pool, PR_WAITOK);
605 kth = &kte->kte_kth;
606 ktrinitheader(kth, l, KTR_PSIG);
607
608 kbuf = malloc(sizeof(*kbuf), M_KTRACE, M_WAITOK);
609 kbuf->kp.signo = (char)sig;
610 kbuf->kp.action = action;
611 kbuf->kp.mask = *mask;
612 kte->kte_buf = kbuf;
613 if (ksi) {
614 kbuf->kp.code = KSI_TRAPCODE(ksi);
615 (void)memset(&kbuf->si, 0, sizeof(kbuf->si));
616 kbuf->si._info = ksi->ksi_info;
617 kth->ktr_len = sizeof(kbuf);
618 } else {
619 kbuf->kp.code = 0;
620 kth->ktr_len = sizeof(struct ktr_psig);
621 }
622
623 ktraddentry(l, kte, KTA_WAITOK);
624 p->p_traceflag &= ~KTRFAC_ACTIVE;
625 }
626
627 void
628 ktrcsw(struct lwp *l, int out, int user)
629 {
630 struct proc *p = l->l_proc;
631 struct ktrace_entry *kte;
632 struct ktr_header *kth;
633 struct ktr_csw *kc;
634
635 p->p_traceflag |= KTRFAC_ACTIVE;
636
637 /*
638 * We can't sleep if we're already going to sleep (if original
639 * condition is met during sleep, we hang up).
640 */
641 kte = pool_get(&kte_pool, out ? PR_NOWAIT : PR_WAITOK);
642 if (kte == NULL) {
643 ktd_logerr(p, KTDE_ENOMEM);
644 goto out;
645 }
646 kth = &kte->kte_kth;
647 ktrinitheader(kth, l, KTR_CSW);
648
649 kc = malloc(sizeof(struct ktr_csw), M_KTRACE,
650 out ? M_NOWAIT : M_WAITOK);
651 if (kc == NULL) {
652 ktd_logerr(p, KTDE_ENOMEM);
653 goto free_kte;
654 }
655 kc->out = out;
656 kc->user = user;
657 kth->ktr_len = sizeof(struct ktr_csw);
658 kte->kte_buf = kc;
659
660 ktraddentry(l, kte, out ? KTA_NOWAIT : KTA_WAITOK);
661 p->p_traceflag &= ~KTRFAC_ACTIVE;
662 return;
663
664 free_kte:
665 pool_put(&kte_pool, kte);
666 out:
667 p->p_traceflag &= ~KTRFAC_ACTIVE;
668 }
669
670 void
671 ktruser(struct lwp *l, const char *id, void *addr, size_t len, int ustr)
672 {
673 struct proc *p = l->l_proc;
674 struct ktrace_entry *kte;
675 struct ktr_header *kth;
676 struct ktr_user *ktp;
677 caddr_t user_dta;
678
679 p->p_traceflag |= KTRFAC_ACTIVE;
680 kte = pool_get(&kte_pool, PR_WAITOK);
681 kth = &kte->kte_kth;
682 ktrinitheader(kth, l, KTR_USER);
683
684 ktp = malloc(sizeof(struct ktr_user) + len, M_KTRACE, M_WAITOK);
685 if (ustr) {
686 if (copyinstr(id, ktp->ktr_id, KTR_USER_MAXIDLEN, NULL) != 0)
687 ktp->ktr_id[0] = '\0';
688 } else
689 strncpy(ktp->ktr_id, id, KTR_USER_MAXIDLEN);
690 ktp->ktr_id[KTR_USER_MAXIDLEN-1] = '\0';
691
692 user_dta = (caddr_t)(ktp + 1);
693 if (copyin(addr, (void *)user_dta, len) != 0)
694 len = 0;
695
696 kth->ktr_len = sizeof(struct ktr_user) + len;
697 kte->kte_buf = ktp;
698
699 ktraddentry(l, kte, KTA_WAITOK);
700 p->p_traceflag &= ~KTRFAC_ACTIVE;
701 }
702
703 void
704 ktrmmsg(struct lwp *l, const void *msgh, size_t size)
705 {
706 ktrkmem(l, KTR_MMSG, msgh, size);
707 }
708
709 void
710 ktrmool(struct lwp *l, const void *kaddr, size_t size, const void *uaddr)
711 {
712 struct proc *p = l->l_proc;
713 struct ktrace_entry *kte;
714 struct ktr_header *kth;
715 struct ktr_mool *kp;
716 struct ktr_mool *buf;
717
718 p->p_traceflag |= KTRFAC_ACTIVE;
719 kte = pool_get(&kte_pool, PR_WAITOK);
720 kth = &kte->kte_kth;
721 ktrinitheader(kth, l, KTR_MOOL);
722
723 kp = malloc(size + sizeof(*kp), M_KTRACE, M_WAITOK);
724 kp->uaddr = uaddr;
725 kp->size = size;
726 buf = kp + 1; /* Skip uaddr and size */
727 (void)memcpy(buf, kaddr, size);
728
729 kth->ktr_len = size + sizeof(*kp);
730 kte->kte_buf = kp;
731
732 ktraddentry(l, kte, KTA_WAITOK);
733 p->p_traceflag &= ~KTRFAC_ACTIVE;
734 }
735
736 void
737 ktrsaupcall(struct lwp *l, int type, int nevent, int nint, void *sas,
738 void *ap)
739 {
740 struct proc *p = l->l_proc;
741 struct ktrace_entry *kte;
742 struct ktr_header *kth;
743 struct ktr_saupcall *ktp;
744 size_t len;
745 struct sa_t **sapp;
746 int i;
747
748 p->p_traceflag |= KTRFAC_ACTIVE;
749 kte = pool_get(&kte_pool, PR_WAITOK);
750 kth = &kte->kte_kth;
751 ktrinitheader(kth, l, KTR_SAUPCALL);
752
753 len = sizeof(struct ktr_saupcall);
754 ktp = malloc(len + sizeof(struct sa_t) * (nevent + nint + 1), M_KTRACE,
755 M_WAITOK);
756
757 ktp->ktr_type = type;
758 ktp->ktr_nevent = nevent;
759 ktp->ktr_nint = nint;
760 ktp->ktr_sas = sas;
761 ktp->ktr_ap = ap;
762 /*
763 * Copy the sa_t's
764 */
765 sapp = (struct sa_t **) sas;
766
767 for (i = nevent + nint; i >= 0; i--) {
768 if (copyin(*sapp, (char *)ktp + len, sizeof(struct sa_t)) == 0)
769 len += sizeof(struct sa_t);
770 sapp++;
771 }
772
773 kth->ktr_len = len;
774 kte->kte_buf = ktp;
775
776 ktraddentry(l, kte, KTA_WAITOK);
777 p->p_traceflag &= ~KTRFAC_ACTIVE;
778 }
779
780 /* Interface and common routines */
781
782 int
783 ktrace_common(struct proc *curp, int ops, int facs, int pid, struct file *fp)
784 {
785 struct proc *p;
786 struct pgrp *pg;
787 struct ktr_desc *ktd = NULL;
788 int ret = 0;
789 int error = 0;
790 int descend;
791
792 curp->p_traceflag |= KTRFAC_ACTIVE;
793 descend = ops & KTRFLAG_DESCEND;
794 facs = facs & ~((unsigned) KTRFAC_ROOT);
795
796 switch (KTROP(ops)) {
797
798 case KTROP_CLEARFILE:
799 /*
800 * Clear all uses of the tracefile
801 */
802
803 ktd = ktd_lookup(fp);
804 if (ktd == NULL)
805 goto done;
806
807 proclist_lock_read();
808 LIST_FOREACH(p, &allproc, p_list) {
809 if (p->p_tracep == ktd) {
810 if (ktrcanset(curp, p))
811 ktrderef(p);
812 else
813 error = EPERM;
814 }
815 }
816 proclist_unlock_read();
817 goto done;
818
819 case KTROP_SET:
820 ktd = ktd_lookup(fp);
821 if (ktd == NULL) {
822 ktd = malloc(sizeof(struct ktr_desc),
823 M_KTRACE, M_WAITOK);
824 TAILQ_INIT(&ktd->ktd_queue);
825 simple_lock_init(&ktd->ktd_slock);
826 callout_init(&ktd->ktd_wakch);
827 ktd->ktd_flags = ktd->ktd_qcount =
828 ktd->ktd_error = ktd->ktd_errcnt = 0;
829 ktd->ktd_ref = 1;
830 ktd->ktd_delayqcnt = ktd_delayqcnt;
831 ktd->ktd_wakedelay = mstohz(ktd_wakedelay);
832 ktd->ktd_intrwakdl = mstohz(ktd_intrwakdl);
833 /*
834 * XXX: not correct. needs an way to detect
835 * whether ktruss or ktrace.
836 */
837 if (fp->f_type == DTYPE_PIPE)
838 ktd->ktd_flags |= KTDF_INTERACTIVE;
839
840 error = kthread_create1(ktrace_thread, ktd,
841 &ktd->ktd_proc, "ktr %p", ktd);
842 if (error != 0) {
843 free(ktd, M_KTRACE);
844 goto done;
845 }
846
847 simple_lock(&fp->f_slock);
848 fp->f_count++;
849 simple_unlock(&fp->f_slock);
850 ktd->ktd_fp = fp;
851
852 simple_lock(&ktdq_slock);
853 TAILQ_INSERT_TAIL(&ktdq, ktd, ktd_list);
854 simple_unlock(&ktdq_slock);
855 }
856 break;
857
858 case KTROP_CLEAR:
859 break;
860 }
861
862 /*
863 * need something to (un)trace (XXX - why is this here?)
864 */
865 if (!facs) {
866 error = EINVAL;
867 goto done;
868 }
869
870 /*
871 * do it
872 */
873 if (pid < 0) {
874 /*
875 * by process group
876 */
877 pg = pg_find(-pid, PFIND_UNLOCK_FAIL);
878 if (pg == NULL) {
879 error = ESRCH;
880 goto done;
881 }
882 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
883 if (descend)
884 ret |= ktrsetchildren(curp, p, ops, facs, ktd);
885 else
886 ret |= ktrops(curp, p, ops, facs, ktd);
887 }
888
889 } else {
890 /*
891 * by pid
892 */
893 p = p_find(pid, PFIND_UNLOCK_FAIL);
894 if (p == NULL) {
895 error = ESRCH;
896 goto done;
897 }
898 if (descend)
899 ret |= ktrsetchildren(curp, p, ops, facs, ktd);
900 else
901 ret |= ktrops(curp, p, ops, facs, ktd);
902 }
903 proclist_unlock_read(); /* taken by p{g}_find */
904 if (!ret)
905 error = EPERM;
906 done:
907 if (error != 0 && ktd != NULL)
908 /*
909 * Wakup the thread so that it can be die if we
910 * can't trace any process.
911 */
912 ktd_wakeup(ktd);
913 if (KTROP(ops) == KTROP_SET ||
914 KTROP(ops) == KTROP_CLEARFILE) {
915 simple_lock(&ktd->ktd_slock);
916 ktdrel(ktd);
917 }
918 curp->p_traceflag &= ~KTRFAC_ACTIVE;
919 return (error);
920 }
921
922 /*
923 * fktrace system call
924 */
925 /* ARGSUSED */
926 int
927 sys_fktrace(struct lwp *l, void *v, register_t *retval)
928 {
929 struct sys_fktrace_args /* {
930 syscallarg(int) fd;
931 syscallarg(int) ops;
932 syscallarg(int) facs;
933 syscallarg(int) pid;
934 } */ *uap = v;
935 struct proc *curp;
936 struct file *fp = NULL;
937 struct filedesc *fdp = l->l_proc->p_fd;
938 int error;
939
940 curp = l->l_proc;
941 fdp = curp->p_fd;
942 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
943 return (EBADF);
944
945 FILE_USE(fp);
946
947 if ((fp->f_flag & FWRITE) == 0)
948 error = EBADF;
949 else
950 error = ktrace_common(curp, SCARG(uap, ops),
951 SCARG(uap, facs), SCARG(uap, pid), fp);
952
953 FILE_UNUSE(fp, l);
954
955 return error;
956 }
957
958 /*
959 * ktrace system call
960 */
961 /* ARGSUSED */
962 int
963 sys_ktrace(struct lwp *l, void *v, register_t *retval)
964 {
965 struct sys_ktrace_args /* {
966 syscallarg(const char *) fname;
967 syscallarg(int) ops;
968 syscallarg(int) facs;
969 syscallarg(int) pid;
970 } */ *uap = v;
971 struct proc *curp = l->l_proc;
972 struct vnode *vp = NULL;
973 struct file *fp = NULL;
974 int ops = SCARG(uap, ops);
975 struct nameidata nd;
976 int error = 0;
977 int fd;
978
979 ops = KTROP(ops) | (ops & KTRFLAG_DESCEND);
980
981 curp->p_traceflag |= KTRFAC_ACTIVE;
982 if ((ops & KTROP_CLEAR) == 0) {
983 /*
984 * an operation which requires a file argument.
985 */
986 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, fname),
987 l);
988 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
989 curp->p_traceflag &= ~KTRFAC_ACTIVE;
990 return (error);
991 }
992 vp = nd.ni_vp;
993 VOP_UNLOCK(vp, 0);
994 if (vp->v_type != VREG) {
995 (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, l);
996 curp->p_traceflag &= ~KTRFAC_ACTIVE;
997 return (EACCES);
998 }
999 /*
1000 * XXX This uses up a file descriptor slot in the
1001 * tracing process for the duration of this syscall.
1002 * This is not expected to be a problem. If
1003 * falloc(NULL, ...) DTRT we could skip that part, but
1004 * that would require changing its interface to allow
1005 * the caller to pass in a ucred..
1006 *
1007 * This will FILE_USE the fp it returns, if any.
1008 * Keep it in use until we return.
1009 */
1010 if ((error = falloc(curp, &fp, &fd)) != 0)
1011 goto done;
1012
1013 fp->f_flag = FWRITE;
1014 fp->f_type = DTYPE_VNODE;
1015 fp->f_ops = &vnops;
1016 fp->f_data = (caddr_t)vp;
1017 FILE_SET_MATURE(fp);
1018 vp = NULL;
1019 }
1020 error = ktrace_common(curp, SCARG(uap, ops), SCARG(uap, facs),
1021 SCARG(uap, pid), fp);
1022 done:
1023 if (vp != NULL)
1024 (void) vn_close(vp, FWRITE, curp->p_ucred, l);
1025 if (fp != NULL) {
1026 FILE_UNUSE(fp, l); /* release file */
1027 fdrelease(l, fd); /* release fd table slot */
1028 }
1029 return (error);
1030 }
1031
1032 int
1033 ktrops(struct proc *curp, struct proc *p, int ops, int facs,
1034 struct ktr_desc *ktd)
1035 {
1036
1037 if (!ktrcanset(curp, p))
1038 return (0);
1039 if (KTROP(ops) == KTROP_SET) {
1040 if (p->p_tracep != ktd) {
1041 /*
1042 * if trace file already in use, relinquish
1043 */
1044 ktrderef(p);
1045 p->p_tracep = ktd;
1046 ktradref(p);
1047 }
1048 p->p_traceflag |= facs;
1049 if (curp->p_ucred->cr_uid == 0)
1050 p->p_traceflag |= KTRFAC_ROOT;
1051 } else {
1052 /* KTROP_CLEAR */
1053 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
1054 /* no more tracing */
1055 ktrderef(p);
1056 }
1057 }
1058
1059 /*
1060 * Emit an emulation record, every time there is a ktrace
1061 * change/attach request.
1062 */
1063 if (KTRPOINT(p, KTR_EMUL))
1064 p->p_traceflag |= KTRFAC_TRC_EMUL;
1065 #ifdef __HAVE_SYSCALL_INTERN
1066 (*p->p_emul->e_syscall_intern)(p);
1067 #endif
1068
1069 return (1);
1070 }
1071
1072 int
1073 ktrsetchildren(struct proc *curp, struct proc *top, int ops, int facs,
1074 struct ktr_desc *ktd)
1075 {
1076 struct proc *p;
1077 int ret = 0;
1078
1079 p = top;
1080 for (;;) {
1081 ret |= ktrops(curp, p, ops, facs, ktd);
1082 /*
1083 * If this process has children, descend to them next,
1084 * otherwise do any siblings, and if done with this level,
1085 * follow back up the tree (but not past top).
1086 */
1087 if (LIST_FIRST(&p->p_children) != NULL) {
1088 p = LIST_FIRST(&p->p_children);
1089 continue;
1090 }
1091 for (;;) {
1092 if (p == top)
1093 return (ret);
1094 if (LIST_NEXT(p, p_sibling) != NULL) {
1095 p = LIST_NEXT(p, p_sibling);
1096 break;
1097 }
1098 p = p->p_pptr;
1099 }
1100 }
1101 /*NOTREACHED*/
1102 }
1103
1104 void
1105 ktrwrite(struct ktr_desc *ktd, struct ktrace_entry *kte)
1106 {
1107 struct uio auio;
1108 struct iovec aiov[64], *iov;
1109 struct ktrace_entry *top = kte;
1110 struct ktr_header *kth;
1111 struct ktr_compat *ktc;
1112 struct file *fp = ktd->ktd_fp;
1113 struct proc *p;
1114 int rl, hl;
1115 int error;
1116 next:
1117 auio.uio_iov = iov = &aiov[0];
1118 auio.uio_offset = 0;
1119 auio.uio_segflg = UIO_SYSSPACE;
1120 auio.uio_rw = UIO_WRITE;
1121 auio.uio_resid = 0;
1122 auio.uio_iovcnt = 0;
1123 auio.uio_lwp = NULL;
1124 do {
1125 kth = &kte->kte_kth;
1126
1127 rl = kth->ktr_len;
1128 hl = KTRv0_LEN;
1129
1130 switch (KTR_VERSION(kth)) {
1131 case 0:
1132 /*
1133 * Convert the old format fields to the new
1134 */
1135 ktc = &kte->kte_ktc;
1136 TIMESPEC_TO_TIMEVAL(&ktc->ktc_time, &kth->ktr_time);
1137 ktc->ktc_unused = NULL;
1138 hl = KTRv0_LEN;
1139 break;
1140 /*
1141 * Add in the incremental header size for later versions of
1142 * header records so that old kdump(1) binaries get the right
1143 * total record lenth.
1144 */
1145 case 1:
1146 kth->ktr_len += KTRv1_LEN - KTRv0_LEN;
1147
1148 hl = KTRv1_LEN;
1149 break;
1150 }
1151 iov->iov_base = (caddr_t)kth;
1152 iov++->iov_len = hl;
1153 auio.uio_resid += hl;
1154 auio.uio_iovcnt++;
1155 if (rl > 0) {
1156 iov->iov_base = kte->kte_buf;
1157 iov++->iov_len = rl;
1158 auio.uio_resid += rl;
1159 auio.uio_iovcnt++;
1160 }
1161 } while ((kte = TAILQ_NEXT(kte, kte_list)) != NULL &&
1162 auio.uio_iovcnt < sizeof(aiov) / sizeof(aiov[0]) - 1);
1163
1164 again:
1165 simple_lock(&fp->f_slock);
1166 FILE_USE(fp);
1167 error = (*fp->f_ops->fo_write)(fp, &fp->f_offset, &auio,
1168 fp->f_cred, FOF_UPDATE_OFFSET);
1169 FILE_UNUSE(fp, NULL);
1170 switch (error) {
1171
1172 case 0:
1173 if (auio.uio_resid > 0)
1174 goto again;
1175 if (kte != NULL)
1176 goto next;
1177 break;
1178
1179 case EWOULDBLOCK:
1180 preempt(1);
1181 goto again;
1182
1183 default:
1184 /*
1185 * If error encountered, give up tracing on this
1186 * vnode. Don't report EPIPE as this can easily
1187 * happen with fktrace()/ktruss.
1188 */
1189 #ifndef DEBUG
1190 if (error != EPIPE)
1191 #endif
1192 log(LOG_NOTICE,
1193 "ktrace write failed, errno %d, tracing stopped\n",
1194 error);
1195 proclist_lock_read();
1196 LIST_FOREACH(p, &allproc, p_list) {
1197 if (p->p_tracep == ktd)
1198 ktrderef(p);
1199 }
1200 proclist_unlock_read();
1201 }
1202
1203 while ((kte = top) != NULL) {
1204 top = TAILQ_NEXT(top, kte_list);
1205 ktefree(kte);
1206 }
1207 }
1208
1209 void
1210 ktrace_thread(void *arg)
1211 {
1212 struct ktr_desc *ktd = arg;
1213 struct file *fp = ktd->ktd_fp;
1214 struct ktrace_entry *kte;
1215 int ktrerr, errcnt;
1216
1217 for (;;) {
1218 simple_lock(&ktd->ktd_slock);
1219 kte = TAILQ_FIRST(&ktd->ktd_queue);
1220 if (kte == NULL) {
1221 if (ktd->ktd_flags & KTDF_WAIT) {
1222 ktd->ktd_flags &= ~(KTDF_WAIT | KTDF_BLOCKING);
1223 wakeup(&ktd->ktd_flags);
1224 }
1225 if (ktd->ktd_ref == 0)
1226 break;
1227 ltsleep(ktd, PWAIT | PNORELOCK, "ktrwait", 0,
1228 &ktd->ktd_slock);
1229 continue;
1230 }
1231 TAILQ_INIT(&ktd->ktd_queue);
1232 ktd->ktd_qcount = 0;
1233 ktrerr = ktd->ktd_error;
1234 errcnt = ktd->ktd_errcnt;
1235 ktd->ktd_error = ktd->ktd_errcnt = 0;
1236 simple_unlock(&ktd->ktd_slock);
1237
1238 if (ktrerr) {
1239 log(LOG_NOTICE,
1240 "ktrace failed, fp %p, error 0x%x, total %d\n",
1241 fp, ktrerr, errcnt);
1242 }
1243 ktrwrite(ktd, kte);
1244 }
1245 simple_unlock(&ktd->ktd_slock);
1246
1247 simple_lock(&ktdq_slock);
1248 TAILQ_REMOVE(&ktdq, ktd, ktd_list);
1249 simple_unlock(&ktdq_slock);
1250
1251 simple_lock(&fp->f_slock);
1252 FILE_USE(fp);
1253
1254 /*
1255 * ktrace file descriptor can't be watched (are not visible to
1256 * userspace), so no kqueue stuff here
1257 * XXX: The above comment is wrong, because the fktrace file
1258 * descriptor is available in userland.
1259 */
1260 closef(fp, NULL);
1261
1262 callout_stop(&ktd->ktd_wakch);
1263 free(ktd, M_KTRACE);
1264
1265 kthread_exit(0);
1266 }
1267
1268 /*
1269 * Return true if caller has permission to set the ktracing state
1270 * of target. Essentially, the target can't possess any
1271 * more permissions than the caller. KTRFAC_ROOT signifies that
1272 * root previously set the tracing status on the target process, and
1273 * so, only root may further change it.
1274 *
1275 * TODO: check groups. use caller effective gid.
1276 */
1277 int
1278 ktrcanset(struct proc *callp, struct proc *targetp)
1279 {
1280 struct pcred *caller = callp->p_cred;
1281 struct pcred *target = targetp->p_cred;
1282
1283 if ((caller->pc_ucred->cr_uid == target->p_ruid &&
1284 target->p_ruid == target->p_svuid &&
1285 caller->p_rgid == target->p_rgid && /* XXX */
1286 target->p_rgid == target->p_svgid &&
1287 (targetp->p_traceflag & KTRFAC_ROOT) == 0 &&
1288 (targetp->p_flag & P_SUGID) == 0) ||
1289 caller->pc_ucred->cr_uid == 0)
1290 return (1);
1291
1292 return (0);
1293 }
1294 #endif /* KTRACE */
1295
1296 /*
1297 * Put user defined entry to ktrace records.
1298 */
1299 int
1300 sys_utrace(struct lwp *l, void *v, register_t *retval)
1301 {
1302 #ifdef KTRACE
1303 struct sys_utrace_args /* {
1304 syscallarg(const char *) label;
1305 syscallarg(void *) addr;
1306 syscallarg(size_t) len;
1307 } */ *uap = v;
1308 struct proc *p = l->l_proc;
1309
1310 if (!KTRPOINT(p, KTR_USER))
1311 return (0);
1312
1313 if (SCARG(uap, len) > KTR_USER_MAXLEN)
1314 return (EINVAL);
1315
1316 ktruser(l, SCARG(uap, label), SCARG(uap, addr), SCARG(uap, len), 1);
1317
1318 return (0);
1319 #else /* !KTRACE */
1320 return ENOSYS;
1321 #endif /* KTRACE */
1322 }
1323