tprof_analyze.c revision 1.3 1 /* $NetBSD: tprof_analyze.c,v 1.3 2018/07/14 07:54:04 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2010,2011,2012 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #ifndef lint
31 __RCSID("$NetBSD: tprof_analyze.c,v 1.3 2018/07/14 07:54:04 maxv Exp $");
32 #endif /* not lint */
33
34 #include <assert.h>
35 #include <err.h>
36 #include <errno.h>
37 #include <fcntl.h>
38 #include <gelf.h>
39 #include <inttypes.h>
40 #include <libelf.h>
41 #include <stdbool.h>
42 #include <stdlib.h>
43 #include <stdio.h>
44 #include <unistd.h>
45 #include <string.h>
46 #include <util.h>
47 #include <dev/tprof/tprof_ioctl.h>
48 #include "tprof.h"
49
50 #define _PATH_KSYMS "/dev/ksyms"
51
52 #include <sys/rbtree.h>
53
54 static bool filter_by_pid;
55 static pid_t target_pid;
56 static bool per_symbol;
57
58 struct addr {
59 struct rb_node node;
60 uint64_t addr; /* address */
61 uint32_t pid; /* process id */
62 uint32_t lwpid; /* lwp id */
63 uint32_t cpuid; /* cpu id */
64 bool in_kernel; /* if addr is in the kernel address space */
65 unsigned int nsamples; /* number of samples taken for the address */
66 };
67
68 static rb_tree_t addrtree;
69
70 struct sym {
71 char *name;
72 uint64_t value;
73 uint64_t size;
74 };
75
76 static struct sym **syms = NULL;
77 static size_t nsyms = 0;
78
79 static int
80 compare_value(const void *p1, const void *p2)
81 {
82 const struct sym *s1 = *(const struct sym * const *)p1;
83 const struct sym *s2 = *(const struct sym * const *)p2;
84
85 if (s1->value > s2->value) {
86 return -1;
87 } else if (s1->value < s2->value) {
88 return 1;
89 }
90 /*
91 * to produce a stable result, it's better not to return 0
92 * even for __strong_alias.
93 */
94 if (s1->size > s2->size) {
95 return -1;
96 } else if (s1->size < s2->size) {
97 return 1;
98 }
99 return strcmp(s1->name, s2->name);
100 }
101
102 static void
103 ksymload(void)
104 {
105 Elf *e;
106 Elf_Scn *s;
107 GElf_Shdr sh_store;
108 GElf_Shdr *sh;
109 Elf_Data *d;
110 int fd;
111 size_t size, i;
112
113 fd = open(_PATH_KSYMS, O_RDONLY);
114 if (fd == -1) {
115 err(EXIT_FAILURE, "open");
116 }
117 if (elf_version(EV_CURRENT) == EV_NONE) {
118 goto elffail;
119 }
120 e = elf_begin(fd, ELF_C_READ, NULL);
121 if (e == NULL) {
122 goto elffail;
123 }
124 for (s = elf_nextscn(e, NULL); s != NULL; s = elf_nextscn(e, s)) {
125 sh = gelf_getshdr(s, &sh_store);
126 if (sh == NULL) {
127 goto elffail;
128 }
129 if (sh->sh_type == SHT_SYMTAB) {
130 break;
131 }
132 }
133 if (s == NULL) {
134 errx(EXIT_FAILURE, "no symtab");
135 }
136 d = elf_getdata(s, NULL);
137 if (d == NULL) {
138 goto elffail;
139 }
140 assert(sh->sh_size == d->d_size);
141 size = sh->sh_size / sh->sh_entsize;
142 for (i = 1; i < size; i++) {
143 GElf_Sym st_store;
144 GElf_Sym *st;
145 struct sym *sym;
146
147 st = gelf_getsym(d, (int)i, &st_store);
148 if (st == NULL) {
149 goto elffail;
150 }
151 if (ELF_ST_TYPE(st->st_info) != STT_FUNC) {
152 continue;
153 }
154 sym = emalloc(sizeof(*sym));
155 sym->name = estrdup(elf_strptr(e, sh->sh_link, st->st_name));
156 sym->value = (uint64_t)st->st_value;
157 sym->size = st->st_size;
158 nsyms++;
159 syms = erealloc(syms, sizeof(*syms) * nsyms);
160 syms[nsyms - 1] = sym;
161 }
162 qsort(syms, nsyms, sizeof(*syms), compare_value);
163 return;
164 elffail:
165 errx(EXIT_FAILURE, "libelf: %s", elf_errmsg(elf_errno()));
166 }
167
168 static const char *
169 ksymlookup(uint64_t value, uint64_t *offset)
170 {
171 size_t hi;
172 size_t lo;
173 size_t i;
174
175 /*
176 * try to find the smallest i for which syms[i]->value <= value.
177 * syms[] is ordered by syms[]->value in the descending order.
178 */
179
180 hi = nsyms - 1;
181 lo = 0;
182 while (lo < hi) {
183 const size_t mid = (lo + hi) / 2;
184 const struct sym *sym = syms[mid];
185
186 assert(syms[lo]->value >= sym->value);
187 assert(sym->value >= syms[hi]->value);
188 if (sym->value <= value) {
189 hi = mid;
190 continue;
191 }
192 lo = mid + 1;
193 }
194 assert(lo == nsyms - 1 || syms[lo]->value <= value);
195 assert(lo == 0 || syms[lo - 1]->value > value);
196 for (i = lo; i < nsyms; i++) {
197 const struct sym *sym = syms[i];
198
199 if (sym->value <= value &&
200 (sym->size == 0 || value - sym->value <= sym->size )) {
201 *offset = value - sym->value;
202 return sym->name;
203 }
204 if (sym->size != 0 && sym->value + sym->size < value) {
205 break;
206 }
207 }
208 return NULL;
209 }
210
211 static signed int
212 addrtree_compare_key(void *ctx, const void *n1, const void *keyp)
213 {
214 const struct addr *a1 = n1;
215 const struct addr *a2 = (const struct addr *)keyp;
216
217 if (a1->addr > a2->addr) {
218 return 1;
219 } else if (a1->addr < a2->addr) {
220 return -1;
221 }
222 if (a1->pid > a2->pid) {
223 return -1;
224 } else if (a1->pid < a2->pid) {
225 return 1;
226 }
227 if (a1->lwpid > a2->lwpid) {
228 return -1;
229 } else if (a1->lwpid < a2->lwpid) {
230 return 1;
231 }
232 if (a1->cpuid > a2->cpuid) {
233 return -1;
234 } else if (a1->cpuid < a2->cpuid) {
235 return 1;
236 }
237 if (a1->in_kernel > a2->in_kernel) {
238 return -1;
239 } else if (a1->in_kernel < a2->in_kernel) {
240 return 1;
241 }
242 return 0;
243 }
244
245 static signed int
246 addrtree_compare_nodes(void *ctx, const void *n1, const void *n2)
247 {
248 const struct addr *a2 = n2;
249
250 return addrtree_compare_key(ctx, n1, a2);
251 }
252
253 static const rb_tree_ops_t addrtree_ops = {
254 .rbto_compare_nodes = addrtree_compare_nodes,
255 .rbto_compare_key = addrtree_compare_key,
256 };
257
258 static int
259 compare_nsamples(const void *p1, const void *p2)
260 {
261 const struct addr *a1 = *(const struct addr * const *)p1;
262 const struct addr *a2 = *(const struct addr * const *)p2;
263
264 if (a1->nsamples > a2->nsamples) {
265 return -1;
266 } else if (a1->nsamples < a2->nsamples) {
267 return 1;
268 }
269 return 0;
270 }
271
272 void
273 tprof_analyze(int argc, char **argv)
274 {
275 struct addr *a;
276 struct addr **l;
277 struct addr **p;
278 size_t naddrs, nsamples, i;
279 float perc;
280 int ch;
281 bool distinguish_processes = true;
282 bool distinguish_cpus = true;
283 bool distinguish_lwps = true;
284 bool kernel_only = false;
285 extern char *optarg;
286 extern int optind;
287 FILE *f;
288
289 while ((ch = getopt(argc, argv, "CkLPp:s")) != -1) {
290 uintmax_t val;
291 char *ep;
292
293 switch (ch) {
294 case 'C': /* don't distinguish cpus */
295 distinguish_cpus = false;
296 break;
297 case 'k': /* kernel only */
298 kernel_only = true;
299 break;
300 case 'L': /* don't distinguish lwps */
301 distinguish_lwps = false;
302 break;
303 case 'p': /* only for the process for the given pid */
304 errno = 0;
305 val = strtoumax(optarg, &ep, 10);
306 if (optarg[0] == 0 || *ep != 0 ||
307 val > INT32_MAX) {
308 errx(EXIT_FAILURE, "invalid p option");
309 }
310 target_pid = val;
311 filter_by_pid = true;
312 break;
313 case 'P': /* don't distinguish processes */
314 distinguish_processes = false;
315 break;
316 case 's': /* per symbol */
317 per_symbol = true;
318 break;
319 default:
320 exit(EXIT_FAILURE);
321 }
322 }
323 argc -= optind;
324 argv += optind;
325
326 if (argc == 0) {
327 errx(EXIT_FAILURE, "missing file name");
328 }
329
330 f = fopen(argv[0], "rb");
331 if (f == NULL) {
332 errx(EXIT_FAILURE, "fopen");
333 }
334
335 ksymload();
336 rb_tree_init(&addrtree, &addrtree_ops);
337
338 /*
339 * read and count samples.
340 */
341
342 naddrs = 0;
343 nsamples = 0;
344 while (/*CONSTCOND*/true) {
345 struct addr *o;
346 tprof_sample_t sample;
347 size_t n = fread(&sample, sizeof(sample), 1, f);
348 bool in_kernel;
349
350 if (n == 0) {
351 if (feof(f)) {
352 break;
353 }
354 if (ferror(f)) {
355 err(EXIT_FAILURE, "fread");
356 }
357 }
358 if (filter_by_pid && (pid_t)sample.s_pid != target_pid) {
359 continue;
360 }
361 in_kernel = (sample.s_flags & TPROF_SAMPLE_INKERNEL) != 0;
362 if (kernel_only && !in_kernel) {
363 continue;
364 }
365 a = emalloc(sizeof(*a));
366 a->addr = (uint64_t)sample.s_pc;
367 if (distinguish_processes) {
368 a->pid = sample.s_pid;
369 } else {
370 a->pid = 0;
371 }
372 if (distinguish_lwps) {
373 a->lwpid = sample.s_lwpid;
374 } else {
375 a->lwpid = 0;
376 }
377 if (distinguish_cpus) {
378 a->cpuid = sample.s_cpuid;
379 } else {
380 a->cpuid = 0;
381 }
382 a->in_kernel = in_kernel;
383 if (per_symbol) {
384 const char *name;
385 uint64_t offset;
386
387 name = ksymlookup(a->addr, &offset);
388 if (name != NULL) {
389 a->addr -= offset;
390 }
391 }
392 a->nsamples = 1;
393 o = rb_tree_insert_node(&addrtree, a);
394 if (o != a) {
395 assert(a->addr == o->addr);
396 assert(a->pid == o->pid);
397 assert(a->lwpid == o->lwpid);
398 assert(a->cpuid == o->cpuid);
399 assert(a->in_kernel == o->in_kernel);
400 free(a);
401 o->nsamples++;
402 } else {
403 naddrs++;
404 }
405 nsamples++;
406 }
407
408 /*
409 * sort samples by addresses.
410 */
411
412 l = emalloc(naddrs * sizeof(*l));
413 p = l;
414 RB_TREE_FOREACH(a, &addrtree) {
415 *p++ = a;
416 }
417 assert(l + naddrs == p);
418 qsort(l, naddrs, sizeof(*l), compare_nsamples);
419
420 /*
421 * print addresses and number of samples, preferably with
422 * resolved symbol names.
423 */
424 printf("File: %s\n", argv[0]);
425 printf("Number of samples: %zu\n\n", nsamples);
426 printf("percentage nsamples pid lwp cpu k address symbol\n");
427 printf("------------ -------- ------ ---- ---- - ---------------- ------\n");
428 for (i = 0; i < naddrs; i++) {
429 const char *name;
430 char buf[100];
431 uint64_t offset;
432
433 a = l[i];
434 if (a->in_kernel) {
435 name = ksymlookup(a->addr, &offset);
436 } else {
437 name = NULL;
438 }
439 if (name == NULL) {
440 (void)snprintf(buf, sizeof(buf), "<%016" PRIx64 ">",
441 a->addr);
442 name = buf;
443 } else if (offset != 0) {
444 (void)snprintf(buf, sizeof(buf), "%s+0x%" PRIx64, name,
445 offset);
446 name = buf;
447 }
448
449 perc = ((float)a->nsamples / (float)nsamples) * 100.0;
450
451 printf("%11f%% %8u %6" PRIu32 " %4" PRIu32 " %4" PRIu32 " %u %016"
452 PRIx64 " %s\n",
453 perc,
454 a->nsamples, a->pid, a->lwpid, a->cpuid, a->in_kernel,
455 a->addr, name);
456 }
457
458 fclose(f);
459 }
460