tprof_x86.c revision 1.2 1 1.2 maxv /* $NetBSD: tprof_x86.c,v 1.2 2018/07/13 08:09:21 maxv Exp $ */
2 1.1 maxv
3 1.1 maxv /*
4 1.1 maxv * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 1.1 maxv * All rights reserved.
6 1.1 maxv *
7 1.1 maxv * This code is derived from software contributed to The NetBSD Foundation
8 1.1 maxv * by Maxime Villard.
9 1.1 maxv *
10 1.1 maxv * Redistribution and use in source and binary forms, with or without
11 1.1 maxv * modification, are permitted provided that the following conditions
12 1.1 maxv * are met:
13 1.1 maxv * 1. Redistributions of source code must retain the above copyright
14 1.1 maxv * notice, this list of conditions and the following disclaimer.
15 1.1 maxv * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 maxv * notice, this list of conditions and the following disclaimer in the
17 1.1 maxv * documentation and/or other materials provided with the distribution.
18 1.1 maxv *
19 1.1 maxv * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 maxv * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 maxv * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 maxv * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 maxv * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 maxv * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 maxv * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 maxv * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 maxv * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 maxv * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 maxv * POSSIBILITY OF SUCH DAMAGE.
30 1.1 maxv */
31 1.1 maxv
32 1.1 maxv #include <sys/cdefs.h>
33 1.1 maxv #include <stdio.h>
34 1.1 maxv #include <stdlib.h>
35 1.1 maxv #include <stdbool.h>
36 1.1 maxv #include <string.h>
37 1.1 maxv #include <unistd.h>
38 1.1 maxv #include <err.h>
39 1.1 maxv #include <machine/specialreg.h>
40 1.1 maxv #include <dev/tprof/tprof_ioctl.h>
41 1.1 maxv #include "../tprof.h"
42 1.1 maxv
43 1.1 maxv int tprof_event_init(uint32_t);
44 1.1 maxv void tprof_event_list(void);
45 1.1 maxv void tprof_event_lookup(const char *, struct tprof_param *);
46 1.1 maxv
47 1.1 maxv struct name_to_event {
48 1.1 maxv const char *name;
49 1.1 maxv uint64_t event;
50 1.1 maxv uint64_t unit;
51 1.1 maxv bool enabled;
52 1.1 maxv };
53 1.1 maxv
54 1.1 maxv struct event_table {
55 1.1 maxv const char *tablename;
56 1.1 maxv struct name_to_event *names;
57 1.1 maxv size_t nevents;
58 1.1 maxv struct event_table *next;
59 1.1 maxv };
60 1.1 maxv
61 1.1 maxv static struct event_table *cpuevents = NULL;
62 1.1 maxv
63 1.1 maxv static void x86_cpuid(unsigned int *eax, unsigned int *ebx,
64 1.1 maxv unsigned int *ecx, unsigned int *edx)
65 1.1 maxv {
66 1.1 maxv asm volatile("cpuid"
67 1.1 maxv : "=a" (*eax),
68 1.1 maxv "=b" (*ebx),
69 1.1 maxv "=c" (*ecx),
70 1.1 maxv "=d" (*edx)
71 1.1 maxv : "0" (*eax), "2" (*ecx));
72 1.1 maxv }
73 1.1 maxv
74 1.1 maxv /* -------------------------------------------------------------------------- */
75 1.1 maxv
76 1.1 maxv /*
77 1.1 maxv * Intel Architectural Version 1.
78 1.1 maxv */
79 1.1 maxv static struct name_to_event intel_arch1_names[] = {
80 1.1 maxv /* Event Name - Event Select - UMask */
81 1.1 maxv { "unhalted-core-cycles", 0x3C, 0x00, true },
82 1.1 maxv { "instruction-retired", 0xC0, 0x00, true },
83 1.1 maxv { "unhalted-reference-cycles", 0x3C, 0x01, true },
84 1.1 maxv { "llc-reference", 0x2E, 0x4F, true },
85 1.1 maxv { "llc-misses", 0x2E, 0x41, true },
86 1.1 maxv { "branch-instruction-retired", 0xC4, 0x00, true },
87 1.1 maxv { "branch-misses-retired", 0xC5, 0x00, true },
88 1.1 maxv };
89 1.1 maxv
90 1.1 maxv static struct event_table intel_arch1 = {
91 1.1 maxv .tablename = "Intel Architectural Version 1",
92 1.1 maxv .names = intel_arch1_names,
93 1.1 maxv .nevents = sizeof(intel_arch1_names) /
94 1.1 maxv sizeof(struct name_to_event),
95 1.1 maxv .next = NULL
96 1.1 maxv };
97 1.1 maxv
98 1.1 maxv static struct event_table *
99 1.1 maxv init_intel_arch1(void)
100 1.1 maxv {
101 1.1 maxv unsigned int eax, ebx, ecx, edx;
102 1.1 maxv struct event_table *table;
103 1.1 maxv size_t i;
104 1.1 maxv
105 1.1 maxv eax = 0x0A;
106 1.1 maxv ebx = 0;
107 1.1 maxv ecx = 0;
108 1.1 maxv edx = 0;
109 1.1 maxv x86_cpuid(&eax, &ebx, &ecx, &edx);
110 1.1 maxv
111 1.1 maxv table = &intel_arch1;
112 1.1 maxv for (i = 0; i < table->nevents; i++) {
113 1.1 maxv /* Disable the unsupported events. */
114 1.1 maxv if ((ebx & (i << 1)) != 0)
115 1.1 maxv table->names[i].enabled = false;
116 1.1 maxv }
117 1.1 maxv
118 1.1 maxv return table;
119 1.1 maxv }
120 1.1 maxv
121 1.1 maxv /*
122 1.1 maxv * Intel Skylake/Kabylake. TODO: there are many more events available.
123 1.1 maxv */
124 1.1 maxv static struct name_to_event intel_skylake_kabylake_names[] = {
125 1.1 maxv /* Event Name - Event Select - UMask */
126 1.1 maxv { "itlb-misses-causes-a-walk", 0x85, 0x01, true },
127 1.1 maxv };
128 1.1 maxv
129 1.1 maxv static struct event_table intel_skylake_kabylake = {
130 1.1 maxv .tablename = "Intel Skylake/Kabylake",
131 1.1 maxv .names = intel_skylake_kabylake_names,
132 1.1 maxv .nevents = sizeof(intel_skylake_kabylake_names) /
133 1.1 maxv sizeof(struct name_to_event),
134 1.1 maxv .next = NULL
135 1.1 maxv };
136 1.1 maxv
137 1.1 maxv static struct event_table *
138 1.1 maxv init_intel_skylake_kabylake(void)
139 1.1 maxv {
140 1.1 maxv return &intel_skylake_kabylake;
141 1.1 maxv }
142 1.1 maxv
143 1.1 maxv static struct event_table *
144 1.1 maxv init_intel_generic(void)
145 1.1 maxv {
146 1.1 maxv unsigned int eax, ebx, ecx, edx;
147 1.1 maxv struct event_table *table;
148 1.1 maxv
149 1.1 maxv /*
150 1.1 maxv * The kernel made sure the Architectural Version 1 PMCs were
151 1.1 maxv * present.
152 1.1 maxv */
153 1.1 maxv table = init_intel_arch1();
154 1.1 maxv
155 1.1 maxv /*
156 1.1 maxv * Now query the additional (non-architectural) events. They
157 1.1 maxv * depend on the CPU model.
158 1.1 maxv */
159 1.1 maxv eax = 0x01;
160 1.1 maxv ebx = 0;
161 1.1 maxv ecx = 0;
162 1.1 maxv edx = 0;
163 1.1 maxv x86_cpuid(&eax, &ebx, &ecx, &edx);
164 1.1 maxv
165 1.1 maxv switch (CPUID_TO_MODEL(eax)) {
166 1.1 maxv case 0x4E: /* Skylake */
167 1.1 maxv case 0x5E: /* Skylake */
168 1.1 maxv case 0x8E: /* Kabylake */
169 1.1 maxv case 0x9E: /* Kabylake */
170 1.1 maxv table->next = init_intel_skylake_kabylake();
171 1.1 maxv break;
172 1.1 maxv }
173 1.1 maxv
174 1.1 maxv return table;
175 1.1 maxv }
176 1.1 maxv
177 1.1 maxv /* -------------------------------------------------------------------------- */
178 1.1 maxv
179 1.1 maxv /*
180 1.1 maxv * AMD Family 10h
181 1.1 maxv */
182 1.1 maxv static struct name_to_event amd_f10h_names[] = {
183 1.2 maxv { "seg-load-all", 0x20, 0x7f, true },
184 1.2 maxv { "seg-load-es", 0x20, 0x01, true },
185 1.2 maxv { "seg-load-cs", 0x20, 0x02, true },
186 1.2 maxv { "seg-load-ss", 0x20, 0x04, true },
187 1.2 maxv { "seg-load-ds", 0x20, 0x08, true },
188 1.2 maxv { "seg-load-fs", 0x20, 0x10, true },
189 1.2 maxv { "seg-load-gs", 0x20, 0x20, true },
190 1.2 maxv { "seg-load-hs", 0x20, 0x40, true },
191 1.2 maxv { "l1cache-access", 0x40, 0x00, true },
192 1.2 maxv { "l1cache-miss", 0x41, 0x00, true },
193 1.2 maxv { "l1cache-refill", 0x42, 0x1f, true },
194 1.2 maxv { "l1cache-refill-invalid", 0x42, 0x01, true },
195 1.2 maxv { "l1cache-refill-shared", 0x42, 0x02, true },
196 1.2 maxv { "l1cache-refill-exclusive", 0x42, 0x04, true },
197 1.2 maxv { "l1cache-refill-owner", 0x42, 0x08, true },
198 1.2 maxv { "l1cache-refill-modified", 0x42, 0x10, true },
199 1.2 maxv { "l1cache-load", 0x43, 0x1f, true },
200 1.2 maxv { "l1cache-load-invalid", 0x43, 0x01, true },
201 1.2 maxv { "l1cache-load-shared", 0x43, 0x02, true },
202 1.2 maxv { "l1cache-load-exclusive", 0x43, 0x04, true },
203 1.2 maxv { "l1cache-load-owner", 0x43, 0x08, true },
204 1.2 maxv { "l1cache-load-modified", 0x43, 0x10, true },
205 1.2 maxv { "l1cache-writeback", 0x44, 0x1f, true },
206 1.2 maxv { "l1cache-writeback-invalid", 0x44, 0x01, true },
207 1.2 maxv { "l1cache-writeback-shared", 0x44, 0x02, true },
208 1.2 maxv { "l1cache-writeback-exclusive",0x44, 0x04, true },
209 1.2 maxv { "l1cache-writeback-owner", 0x44, 0x08, true },
210 1.2 maxv { "l1cache-writeback-modified", 0x44, 0x10, true },
211 1.2 maxv { "l1DTLB-hit-all", 0x4D, 0x07, true },
212 1.2 maxv { "l1DTLB-hit-4Kpage", 0x4D, 0x01, true },
213 1.2 maxv { "l1DTLB-hit-2Mpage", 0x4D, 0x02, true },
214 1.2 maxv { "l1DTLB-hit-1Gpage", 0x4D, 0x04, true },
215 1.2 maxv { "l1DTLB-miss-all", 0x45, 0x07, true },
216 1.2 maxv { "l1DTLB-miss-4Kpage", 0x45, 0x01, true },
217 1.2 maxv { "l1DTLB-miss-2Mpage", 0x45, 0x02, true },
218 1.2 maxv { "l1DTLB-miss-1Gpage", 0x45, 0x04, true },
219 1.2 maxv { "l2DTLB-miss-all", 0x46, 0x03, true },
220 1.2 maxv { "l2DTLB-miss-4Kpage", 0x46, 0x01, true },
221 1.2 maxv { "l2DTLB-miss-2Mpage", 0x46, 0x02, true },
222 1.1 maxv /* l2DTLB-miss-1Gpage: reserved on some revisions, so disabled */
223 1.2 maxv { "l1ITLB-miss", 0x84, 0x00, true },
224 1.2 maxv { "l2ITLB-miss-all", 0x85, 0x03, true },
225 1.2 maxv { "l2ITLB-miss-4Kpage", 0x85, 0x01, true },
226 1.2 maxv { "l2ITLB-miss-2Mpage", 0x85, 0x02, true },
227 1.2 maxv { "mem-misalign-ref", 0x47, 0x00, true },
228 1.2 maxv { "ins-fetch", 0x80, 0x00, true },
229 1.2 maxv { "ins-fetch-miss", 0x81, 0x00, true },
230 1.2 maxv { "ins-refill-l2", 0x82, 0x00, true },
231 1.2 maxv { "ins-refill-sys", 0x83, 0x00, true },
232 1.2 maxv { "ins-fetch-stall", 0x87, 0x00, true },
233 1.2 maxv { "ins-retired", 0xC0, 0x00, true },
234 1.2 maxv { "ins-empty", 0xD0, 0x00, true },
235 1.2 maxv { "ops-retired", 0xC1, 0x00, true },
236 1.2 maxv { "branch-retired", 0xC2, 0x00, true },
237 1.2 maxv { "branch-miss-retired", 0xC3, 0x00, true },
238 1.2 maxv { "branch-taken-retired", 0xC4, 0x00, true },
239 1.2 maxv { "branch-taken-miss-retired", 0xC5, 0x00, true },
240 1.2 maxv { "branch-far-retired", 0xC6, 0x00, true },
241 1.2 maxv { "branch-resync-retired", 0xC7, 0x00, true },
242 1.2 maxv { "branch-near-retired", 0xC8, 0x00, true },
243 1.2 maxv { "branch-near-miss-retired", 0xC9, 0x00, true },
244 1.2 maxv { "branch-indirect-miss-retired", 0xCA, 0x00, true },
245 1.2 maxv { "int-hw", 0xCF, 0x00, true },
246 1.2 maxv { "int-cycles-masked", 0xCD, 0x00, true },
247 1.2 maxv { "int-cycles-masked-pending", 0xCE, 0x00, true },
248 1.2 maxv { "fpu-exceptions", 0xDB, 0x00, true },
249 1.2 maxv { "break-match0", 0xDC, 0x00, true },
250 1.2 maxv { "break-match1", 0xDD, 0x00, true },
251 1.2 maxv { "break-match2", 0xDE, 0x00, true },
252 1.2 maxv { "break-match3", 0xDF, 0x00, true },
253 1.1 maxv };
254 1.1 maxv
255 1.1 maxv static struct event_table amd_f10h = {
256 1.1 maxv .tablename = "AMD Family 10h",
257 1.1 maxv .names = amd_f10h_names,
258 1.1 maxv .nevents = sizeof(amd_f10h_names) /
259 1.1 maxv sizeof(struct name_to_event),
260 1.1 maxv .next = NULL
261 1.1 maxv };
262 1.1 maxv
263 1.1 maxv static struct event_table *
264 1.1 maxv init_amd_f10h(void)
265 1.1 maxv {
266 1.1 maxv return &amd_f10h;
267 1.1 maxv }
268 1.1 maxv
269 1.1 maxv static struct event_table *
270 1.1 maxv init_amd_generic(void)
271 1.1 maxv {
272 1.1 maxv unsigned int eax, ebx, ecx, edx;
273 1.1 maxv
274 1.1 maxv eax = 0x01;
275 1.1 maxv ebx = 0;
276 1.1 maxv ecx = 0;
277 1.1 maxv edx = 0;
278 1.1 maxv x86_cpuid(&eax, &ebx, &ecx, &edx);
279 1.1 maxv
280 1.1 maxv switch (CPUID_TO_FAMILY(eax)) {
281 1.1 maxv case 0x10:
282 1.1 maxv return init_amd_f10h();
283 1.1 maxv }
284 1.1 maxv
285 1.1 maxv return NULL;
286 1.1 maxv }
287 1.1 maxv
288 1.1 maxv /* -------------------------------------------------------------------------- */
289 1.1 maxv
290 1.1 maxv int
291 1.1 maxv tprof_event_init(uint32_t ident)
292 1.1 maxv {
293 1.1 maxv switch (ident) {
294 1.1 maxv case TPROF_IDENT_NONE:
295 1.1 maxv return -1;
296 1.1 maxv case TPROF_IDENT_INTEL_GENERIC:
297 1.1 maxv cpuevents = init_intel_generic();
298 1.1 maxv break;
299 1.1 maxv case TPROF_IDENT_AMD_GENERIC:
300 1.1 maxv cpuevents = init_amd_generic();
301 1.1 maxv break;
302 1.1 maxv }
303 1.1 maxv return (cpuevents == NULL) ? -1 : 0;
304 1.1 maxv }
305 1.1 maxv
306 1.1 maxv static void
307 1.1 maxv recursive_event_list(struct event_table *table)
308 1.1 maxv {
309 1.1 maxv size_t i;
310 1.1 maxv
311 1.1 maxv printf("%s:\n", table->tablename);
312 1.1 maxv for (i = 0; i < table->nevents; i++) {
313 1.1 maxv if (!table->names[i].enabled)
314 1.1 maxv continue;
315 1.1 maxv printf("\t%s\n", table->names[i].name);
316 1.1 maxv }
317 1.1 maxv printf("\n");
318 1.1 maxv
319 1.1 maxv if (table->next != NULL) {
320 1.1 maxv recursive_event_list(table->next);
321 1.1 maxv }
322 1.1 maxv }
323 1.1 maxv
324 1.1 maxv void
325 1.1 maxv tprof_event_list(void)
326 1.1 maxv {
327 1.1 maxv recursive_event_list(cpuevents);
328 1.1 maxv }
329 1.1 maxv
330 1.1 maxv static void
331 1.1 maxv recursive_event_lookup(struct event_table *table, const char *name,
332 1.1 maxv struct tprof_param *param)
333 1.1 maxv {
334 1.1 maxv size_t i;
335 1.1 maxv
336 1.1 maxv for (i = 0; i < table->nevents; i++) {
337 1.1 maxv if (!table->names[i].enabled)
338 1.1 maxv continue;
339 1.1 maxv if (!strcmp(table->names[i].name, name)) {
340 1.1 maxv param->p_event = table->names[i].event;
341 1.1 maxv param->p_unit = table->names[i].unit;
342 1.1 maxv return;
343 1.1 maxv }
344 1.1 maxv }
345 1.1 maxv
346 1.1 maxv if (table->next != NULL) {
347 1.1 maxv recursive_event_lookup(table->next, name, param);
348 1.1 maxv } else {
349 1.1 maxv errx(EXIT_FAILURE, "event '%s' unknown", name);
350 1.1 maxv }
351 1.1 maxv }
352 1.1 maxv
353 1.1 maxv void
354 1.1 maxv tprof_event_lookup(const char *name, struct tprof_param *param)
355 1.1 maxv {
356 1.1 maxv recursive_event_lookup(cpuevents, name, param);
357 1.1 maxv }
358