tprof.c revision 1.1 1 /* $NetBSD: tprof.c,v 1.1 2008/01/01 21:28:37 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2008 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.1 2008/01/01 21:28:37 yamt Exp $");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35
36 #include <sys/cpu.h>
37 #include <sys/conf.h>
38 #include <sys/callout.h>
39 #include <sys/kmem.h>
40 #include <sys/workqueue.h>
41 #include <sys/queue.h>
42
43 #include <dev/tprof/tprof.h>
44 #include <dev/tprof/tprof_ioctl.h>
45
46 #include <machine/db_machdep.h> /* PC_REGS */
47
48 typedef struct {
49 uintptr_t s_pc; /* program counter */
50 } tprof_sample_t;
51
52 typedef struct tprof_buf {
53 u_int b_used;
54 u_int b_size;
55 u_int b_overflow;
56 u_int b_unused;
57 STAILQ_ENTRY(tprof_buf) b_list;
58 tprof_sample_t b_data[];
59 } tprof_buf_t;
60 #define TPROF_BUF_BYTESIZE(sz) \
61 (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t))
62 #define TPROF_MAX_SAMPLES_PER_BUF 10000
63
64 #define TPROF_MAX_BUF 100
65
66 typedef struct {
67 tprof_buf_t *c_buf;
68 struct work c_work;
69 callout_t c_callout;
70 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t;
71
72 static kmutex_t tprof_lock;
73 static bool tprof_running;
74 static u_int tprof_nworker;
75 static lwp_t *tprof_owner;
76 static STAILQ_HEAD(, tprof_buf) tprof_list;
77 static u_int tprof_nbuf_on_list;
78 static struct workqueue *tprof_wq;
79 static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE);
80 static u_int tprof_samples_per_buf;
81
82 static kmutex_t tprof_reader_lock;
83 static kcondvar_t tprof_reader_cv;
84 static off_t tprof_reader_offset;
85
86 static kmutex_t tprof_startstop_lock;
87 static kcondvar_t tprof_cv;
88
89 static struct tprof_stat tprof_stat;
90
91 static tprof_cpu_t *
92 tprof_cpu(struct cpu_info *ci)
93 {
94
95 return &tprof_cpus[cpu_index(ci)];
96 }
97
98 static tprof_cpu_t *
99 tprof_curcpu(void)
100 {
101
102 return tprof_cpu(curcpu());
103 }
104
105 static tprof_buf_t *
106 tprof_buf_alloc(void)
107 {
108 tprof_buf_t *new;
109 u_int size = tprof_samples_per_buf;
110
111 new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP);
112 new->b_used = 0;
113 new->b_size = size;
114 new->b_overflow = 0;
115 return new;
116 }
117
118 static void
119 tprof_buf_free(tprof_buf_t *buf)
120 {
121
122 kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size));
123 }
124
125 static tprof_buf_t *
126 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new)
127 {
128 tprof_buf_t *old;
129
130 old = c->c_buf;
131 c->c_buf = new;
132 return old;
133 }
134
135 static tprof_buf_t *
136 tprof_buf_refresh(void)
137 {
138 tprof_cpu_t * const c = tprof_curcpu();
139 tprof_buf_t *new;
140
141 new = tprof_buf_alloc();
142 return tprof_buf_switch(c, new);
143 }
144
145 static void
146 tprof_worker(struct work *wk, void *dummy)
147 {
148 tprof_cpu_t * const c = tprof_curcpu();
149 tprof_buf_t *buf;
150 bool shouldstop;
151
152 KASSERT(wk == &c->c_work);
153 KASSERT(dummy == NULL);
154
155 /*
156 * get a per cpu buffer.
157 */
158 buf = tprof_buf_refresh();
159
160 /*
161 * and put it on the global list for read(2).
162 */
163 mutex_enter(&tprof_lock);
164 shouldstop = !tprof_running;
165 if (shouldstop) {
166 KASSERT(tprof_nworker > 0);
167 tprof_nworker--;
168 cv_broadcast(&tprof_cv);
169 cv_broadcast(&tprof_reader_cv);
170 }
171 if (buf->b_used == 0) {
172 tprof_stat.ts_emptybuf++;
173 } else if (tprof_nbuf_on_list < TPROF_MAX_BUF) {
174 tprof_stat.ts_sample += buf->b_used;
175 tprof_stat.ts_overflow += buf->b_overflow;
176 tprof_stat.ts_buf++;
177 STAILQ_INSERT_TAIL(&tprof_list, buf, b_list);
178 tprof_nbuf_on_list++;
179 buf = NULL;
180 cv_broadcast(&tprof_reader_cv);
181 } else {
182 tprof_stat.ts_dropbuf_sample += buf->b_used;
183 tprof_stat.ts_dropbuf++;
184 }
185 mutex_exit(&tprof_lock);
186 if (buf) {
187 tprof_buf_free(buf);
188 }
189 if (!shouldstop) {
190 callout_schedule(&c->c_callout, hz);
191 }
192 }
193
194 static void
195 tprof_kick(void *vp)
196 {
197 struct cpu_info * const ci = vp;
198 tprof_cpu_t * const c = tprof_cpu(ci);
199
200 workqueue_enqueue(tprof_wq, &c->c_work, ci);
201 }
202
203 static void
204 tprof_stop1(void)
205 {
206 CPU_INFO_ITERATOR cii;
207 struct cpu_info *ci;
208
209 KASSERT(mutex_owned(&tprof_startstop_lock));
210
211 for (CPU_INFO_FOREACH(cii, ci)) {
212 tprof_cpu_t * const c = tprof_cpu(ci);
213 tprof_buf_t *old;
214
215 old = tprof_buf_switch(c, NULL);
216 if (old != NULL) {
217 tprof_buf_free(old);
218 }
219 callout_destroy(&c->c_callout);
220 }
221 workqueue_destroy(tprof_wq);
222 }
223
224 static int
225 tprof_start(const struct tprof_param *param)
226 {
227 CPU_INFO_ITERATOR cii;
228 struct cpu_info *ci;
229 int error;
230 uint64_t freq;
231
232 KASSERT(mutex_owned(&tprof_startstop_lock));
233 if (tprof_running) {
234 error = EBUSY;
235 goto done;
236 }
237
238 freq = tprof_backend_estimate_freq();
239 tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF);
240
241 error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL,
242 PRI_NONE, PRI_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU);
243 if (error != 0) {
244 goto done;
245 }
246
247 for (CPU_INFO_FOREACH(cii, ci)) {
248 tprof_cpu_t * const c = tprof_cpu(ci);
249 tprof_buf_t *new;
250 tprof_buf_t *old;
251
252 new = tprof_buf_alloc();
253 old = tprof_buf_switch(c, new);
254 if (old != NULL) {
255 tprof_buf_free(old);
256 }
257 callout_init(&c->c_callout, CALLOUT_MPSAFE);
258 callout_setfunc(&c->c_callout, tprof_kick, ci);
259 }
260
261 error = tprof_backend_start();
262 if (error != 0) {
263 tprof_stop1();
264 goto done;
265 }
266
267 mutex_enter(&tprof_lock);
268 tprof_running = true;
269 mutex_exit(&tprof_lock);
270 for (CPU_INFO_FOREACH(cii, ci)) {
271 tprof_cpu_t * const c = tprof_cpu(ci);
272
273 mutex_enter(&tprof_lock);
274 tprof_nworker++;
275 mutex_exit(&tprof_lock);
276 workqueue_enqueue(tprof_wq, &c->c_work, ci);
277 }
278 done:
279 return error;
280 }
281
282 static void
283 tprof_stop(void)
284 {
285 CPU_INFO_ITERATOR cii;
286 struct cpu_info *ci;
287
288 KASSERT(mutex_owned(&tprof_startstop_lock));
289 if (!tprof_running) {
290 goto done;
291 }
292
293 tprof_backend_stop();
294
295 mutex_enter(&tprof_lock);
296 tprof_running = false;
297 cv_broadcast(&tprof_reader_cv);
298 mutex_exit(&tprof_lock);
299
300 for (CPU_INFO_FOREACH(cii, ci)) {
301 mutex_enter(&tprof_lock);
302 while (tprof_nworker > 0) {
303 cv_wait(&tprof_cv, &tprof_lock);
304 }
305 mutex_exit(&tprof_lock);
306 }
307
308 tprof_stop1();
309 done:
310 ;
311 }
312
313 static void
314 tprof_clear(void)
315 {
316 tprof_buf_t *buf;
317
318 mutex_enter(&tprof_reader_lock);
319 mutex_enter(&tprof_lock);
320 while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) {
321 if (buf != NULL) {
322 STAILQ_REMOVE_HEAD(&tprof_list, b_list);
323 KASSERT(tprof_nbuf_on_list > 0);
324 tprof_nbuf_on_list--;
325 mutex_exit(&tprof_lock);
326 tprof_buf_free(buf);
327 mutex_enter(&tprof_lock);
328 }
329 }
330 KASSERT(tprof_nbuf_on_list == 0);
331 mutex_exit(&tprof_lock);
332 tprof_reader_offset = 0;
333 mutex_exit(&tprof_reader_lock);
334
335 memset(&tprof_stat, 0, sizeof(tprof_stat));
336 }
337
338 /* -------------------- backend interfaces */
339
340 /*
341 * tprof_sample: record a sample on the per-cpu buffer.
342 *
343 * be careful; can be called in NMI context.
344 * we are assuming that curcpu() is safe.
345 */
346
347 void
348 tprof_sample(const struct trapframe *tf)
349 {
350 tprof_cpu_t * const c = tprof_curcpu();
351 tprof_buf_t * const buf = c->c_buf;
352 const uintptr_t pc = PC_REGS(tf);
353 u_int idx;
354
355 idx = buf->b_used;
356 if (__predict_false(idx >= buf->b_size)) {
357 buf->b_overflow++;
358 return;
359 }
360 buf->b_data[idx].s_pc = pc;
361 buf->b_used = idx + 1;
362 }
363
364 /* -------------------- cdevsw interfaces */
365
366 void tprofattach(int);
367
368 static int
369 tprof_open(dev_t dev, int flags, int type, struct lwp *l)
370 {
371
372 if (minor(dev) != 0) {
373 return EXDEV;
374 }
375 mutex_enter(&tprof_lock);
376 if (tprof_owner != NULL) {
377 mutex_exit(&tprof_lock);
378 return EBUSY;
379 }
380 tprof_owner = curlwp;
381 mutex_exit(&tprof_lock);
382
383 return 0;
384 }
385
386 static int
387 tprof_close(dev_t dev, int flags, int type, struct lwp *l)
388 {
389
390 KASSERT(minor(dev) == 0);
391
392 mutex_enter(&tprof_startstop_lock);
393 mutex_enter(&tprof_lock);
394 tprof_owner = NULL;
395 mutex_exit(&tprof_lock);
396 tprof_stop();
397 tprof_clear();
398 mutex_exit(&tprof_startstop_lock);
399
400 return 0;
401 }
402
403 static int
404 tprof_read(dev_t dev, struct uio *uio, int flags)
405 {
406 tprof_buf_t *buf;
407 size_t bytes;
408 size_t resid;
409 size_t done;
410 int error = 0;
411
412 KASSERT(minor(dev) == 0);
413 mutex_enter(&tprof_reader_lock);
414 while (uio->uio_resid > 0 && error == 0) {
415 /*
416 * take the first buffer from the list.
417 */
418 mutex_enter(&tprof_lock);
419 buf = STAILQ_FIRST(&tprof_list);
420 if (buf == NULL) {
421 if (tprof_nworker == 0) {
422 mutex_exit(&tprof_lock);
423 error = 0;
424 break;
425 }
426 mutex_exit(&tprof_reader_lock);
427 error = cv_wait_sig(&tprof_reader_cv, &tprof_lock);
428 mutex_exit(&tprof_lock);
429 mutex_enter(&tprof_reader_lock);
430 continue;
431 }
432 STAILQ_REMOVE_HEAD(&tprof_list, b_list);
433 KASSERT(tprof_nbuf_on_list > 0);
434 tprof_nbuf_on_list--;
435 mutex_exit(&tprof_lock);
436
437 /*
438 * copy it out.
439 */
440 bytes = MIN(buf->b_used * sizeof(tprof_sample_t) -
441 tprof_reader_offset, uio->uio_resid);
442 resid = uio->uio_resid;
443 error = uiomove((char *)buf->b_data + tprof_reader_offset,
444 bytes, uio);
445 done = resid - uio->uio_resid;
446 tprof_reader_offset += done;
447
448 /*
449 * if we didn't consume the whole buffer,
450 * put it back to the list.
451 */
452 if (tprof_reader_offset <
453 buf->b_used * sizeof(tprof_sample_t)) {
454 mutex_enter(&tprof_lock);
455 STAILQ_INSERT_HEAD(&tprof_list, buf, b_list);
456 tprof_nbuf_on_list++;
457 cv_broadcast(&tprof_reader_cv);
458 mutex_exit(&tprof_lock);
459 } else {
460 tprof_buf_free(buf);
461 tprof_reader_offset = 0;
462 }
463 }
464 mutex_exit(&tprof_reader_lock);
465
466 return error;
467 }
468
469 static int
470 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
471 {
472 const struct tprof_param *param;
473 int error = 0;
474
475 KASSERT(minor(dev) == 0);
476
477 switch (cmd) {
478 case TPROF_IOC_GETVERSION:
479 *(int *)data = TPROF_VERSION;
480 break;
481 case TPROF_IOC_START:
482 param = data;
483 mutex_enter(&tprof_startstop_lock);
484 error = tprof_start(param);
485 mutex_exit(&tprof_startstop_lock);
486 break;
487 case TPROF_IOC_STOP:
488 mutex_enter(&tprof_startstop_lock);
489 tprof_stop();
490 mutex_exit(&tprof_startstop_lock);
491 break;
492 case TPROF_IOC_GETSTAT:
493 mutex_enter(&tprof_lock);
494 memcpy(data, &tprof_stat, sizeof(tprof_stat));
495 mutex_exit(&tprof_lock);
496 break;
497 default:
498 error = EINVAL;
499 break;
500 }
501
502 return error;
503 }
504
505 const struct cdevsw tprof_cdevsw = {
506 .d_open = tprof_open,
507 .d_close = tprof_close,
508 .d_read = tprof_read,
509 .d_write = nowrite,
510 .d_ioctl = tprof_ioctl,
511 .d_stop = nostop,
512 .d_tty = notty,
513 .d_poll = nopoll,
514 .d_mmap = nommap,
515 .d_kqfilter = nokqfilter,
516 .d_flag = D_OTHER | D_MPSAFE,
517 };
518
519 void
520 tprofattach(int nunits)
521 {
522
523 mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE);
524 mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE);
525 mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE);
526 cv_init(&tprof_cv, "tprof");
527 cv_init(&tprof_reader_cv, "tprofread");
528 STAILQ_INIT(&tprof_list);
529 }
530