tprof_x86.c revision 1.3 1 /* $NetBSD: tprof_x86.c,v 1.3 2018/07/13 09:53:42 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <stdbool.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <err.h>
39 #include <machine/specialreg.h>
40 #include <dev/tprof/tprof_ioctl.h>
41 #include "../tprof.h"
42
43 int tprof_event_init(uint32_t);
44 void tprof_event_list(void);
45 void tprof_event_lookup(const char *, struct tprof_param *);
46
47 struct name_to_event {
48 const char *name;
49 uint64_t event;
50 uint64_t unit;
51 bool enabled;
52 };
53
54 struct event_table {
55 const char *tablename;
56 struct name_to_event *names;
57 size_t nevents;
58 struct event_table *next;
59 };
60
61 static struct event_table *cpuevents = NULL;
62
63 static void x86_cpuid(unsigned int *eax, unsigned int *ebx,
64 unsigned int *ecx, unsigned int *edx)
65 {
66 asm volatile("cpuid"
67 : "=a" (*eax),
68 "=b" (*ebx),
69 "=c" (*ecx),
70 "=d" (*edx)
71 : "0" (*eax), "2" (*ecx));
72 }
73
74 /* -------------------------------------------------------------------------- */
75
76 /*
77 * Intel Architectural Version 1.
78 */
79 static struct name_to_event intel_arch1_names[] = {
80 /* Event Name - Event Select - UMask */
81 { "unhalted-core-cycles", 0x3C, 0x00, true },
82 { "instruction-retired", 0xC0, 0x00, true },
83 { "unhalted-reference-cycles", 0x3C, 0x01, true },
84 { "llc-reference", 0x2E, 0x4F, true },
85 { "llc-misses", 0x2E, 0x41, true },
86 { "branch-instruction-retired", 0xC4, 0x00, true },
87 { "branch-misses-retired", 0xC5, 0x00, true },
88 };
89
90 static struct event_table intel_arch1 = {
91 .tablename = "Intel Architectural Version 1",
92 .names = intel_arch1_names,
93 .nevents = sizeof(intel_arch1_names) /
94 sizeof(struct name_to_event),
95 .next = NULL
96 };
97
98 static struct event_table *
99 init_intel_arch1(void)
100 {
101 unsigned int eax, ebx, ecx, edx;
102 struct event_table *table;
103 size_t i;
104
105 eax = 0x0A;
106 ebx = 0;
107 ecx = 0;
108 edx = 0;
109 x86_cpuid(&eax, &ebx, &ecx, &edx);
110
111 table = &intel_arch1;
112 for (i = 0; i < table->nevents; i++) {
113 /* Disable the unsupported events. */
114 if ((ebx & (i << 1)) != 0)
115 table->names[i].enabled = false;
116 }
117
118 return table;
119 }
120
121 /*
122 * Intel Skylake/Kabylake. TODO: there are many more events available.
123 */
124 static struct name_to_event intel_skylake_kabylake_names[] = {
125 /* Event Name - Event Select - UMask */
126 { "itlb-misses-causes-a-walk", 0x85, 0x01, true },
127 };
128
129 static struct event_table intel_skylake_kabylake = {
130 .tablename = "Intel Skylake/Kabylake",
131 .names = intel_skylake_kabylake_names,
132 .nevents = sizeof(intel_skylake_kabylake_names) /
133 sizeof(struct name_to_event),
134 .next = NULL
135 };
136
137 static struct event_table *
138 init_intel_skylake_kabylake(void)
139 {
140 return &intel_skylake_kabylake;
141 }
142
143 static struct event_table *
144 init_intel_generic(void)
145 {
146 unsigned int eax, ebx, ecx, edx;
147 struct event_table *table;
148
149 /*
150 * The kernel made sure the Architectural Version 1 PMCs were
151 * present.
152 */
153 table = init_intel_arch1();
154
155 /*
156 * Now query the additional (non-architectural) events. They
157 * depend on the CPU model.
158 */
159 eax = 0x01;
160 ebx = 0;
161 ecx = 0;
162 edx = 0;
163 x86_cpuid(&eax, &ebx, &ecx, &edx);
164
165 if (CPUID_TO_FAMILY(eax) == 6) {
166 switch (CPUID_TO_MODEL(eax)) {
167 case 0x4E: /* Skylake */
168 case 0x5E: /* Skylake */
169 case 0x8E: /* Kabylake */
170 case 0x9E: /* Kabylake */
171 table->next = init_intel_skylake_kabylake();
172 break;
173 }
174 }
175
176 return table;
177 }
178
179 /* -------------------------------------------------------------------------- */
180
181 /*
182 * AMD Family 10h
183 */
184 static struct name_to_event amd_f10h_names[] = {
185 { "seg-load-all", 0x20, 0x7f, true },
186 { "seg-load-es", 0x20, 0x01, true },
187 { "seg-load-cs", 0x20, 0x02, true },
188 { "seg-load-ss", 0x20, 0x04, true },
189 { "seg-load-ds", 0x20, 0x08, true },
190 { "seg-load-fs", 0x20, 0x10, true },
191 { "seg-load-gs", 0x20, 0x20, true },
192 { "seg-load-hs", 0x20, 0x40, true },
193 { "l1cache-access", 0x40, 0x00, true },
194 { "l1cache-miss", 0x41, 0x00, true },
195 { "l1cache-refill", 0x42, 0x1f, true },
196 { "l1cache-refill-invalid", 0x42, 0x01, true },
197 { "l1cache-refill-shared", 0x42, 0x02, true },
198 { "l1cache-refill-exclusive", 0x42, 0x04, true },
199 { "l1cache-refill-owner", 0x42, 0x08, true },
200 { "l1cache-refill-modified", 0x42, 0x10, true },
201 { "l1cache-load", 0x43, 0x1f, true },
202 { "l1cache-load-invalid", 0x43, 0x01, true },
203 { "l1cache-load-shared", 0x43, 0x02, true },
204 { "l1cache-load-exclusive", 0x43, 0x04, true },
205 { "l1cache-load-owner", 0x43, 0x08, true },
206 { "l1cache-load-modified", 0x43, 0x10, true },
207 { "l1cache-writeback", 0x44, 0x1f, true },
208 { "l1cache-writeback-invalid", 0x44, 0x01, true },
209 { "l1cache-writeback-shared", 0x44, 0x02, true },
210 { "l1cache-writeback-exclusive",0x44, 0x04, true },
211 { "l1cache-writeback-owner", 0x44, 0x08, true },
212 { "l1cache-writeback-modified", 0x44, 0x10, true },
213 { "l1DTLB-hit-all", 0x4D, 0x07, true },
214 { "l1DTLB-hit-4Kpage", 0x4D, 0x01, true },
215 { "l1DTLB-hit-2Mpage", 0x4D, 0x02, true },
216 { "l1DTLB-hit-1Gpage", 0x4D, 0x04, true },
217 { "l1DTLB-miss-all", 0x45, 0x07, true },
218 { "l1DTLB-miss-4Kpage", 0x45, 0x01, true },
219 { "l1DTLB-miss-2Mpage", 0x45, 0x02, true },
220 { "l1DTLB-miss-1Gpage", 0x45, 0x04, true },
221 { "l2DTLB-miss-all", 0x46, 0x03, true },
222 { "l2DTLB-miss-4Kpage", 0x46, 0x01, true },
223 { "l2DTLB-miss-2Mpage", 0x46, 0x02, true },
224 /* l2DTLB-miss-1Gpage: reserved on some revisions, so disabled */
225 { "l1ITLB-miss", 0x84, 0x00, true },
226 { "l2ITLB-miss-all", 0x85, 0x03, true },
227 { "l2ITLB-miss-4Kpage", 0x85, 0x01, true },
228 { "l2ITLB-miss-2Mpage", 0x85, 0x02, true },
229 { "mem-misalign-ref", 0x47, 0x00, true },
230 { "ins-fetch", 0x80, 0x00, true },
231 { "ins-fetch-miss", 0x81, 0x00, true },
232 { "ins-refill-l2", 0x82, 0x00, true },
233 { "ins-refill-sys", 0x83, 0x00, true },
234 { "ins-fetch-stall", 0x87, 0x00, true },
235 { "ins-retired", 0xC0, 0x00, true },
236 { "ins-empty", 0xD0, 0x00, true },
237 { "ops-retired", 0xC1, 0x00, true },
238 { "branch-retired", 0xC2, 0x00, true },
239 { "branch-miss-retired", 0xC3, 0x00, true },
240 { "branch-taken-retired", 0xC4, 0x00, true },
241 { "branch-taken-miss-retired", 0xC5, 0x00, true },
242 { "branch-far-retired", 0xC6, 0x00, true },
243 { "branch-resync-retired", 0xC7, 0x00, true },
244 { "branch-near-retired", 0xC8, 0x00, true },
245 { "branch-near-miss-retired", 0xC9, 0x00, true },
246 { "branch-indirect-miss-retired", 0xCA, 0x00, true },
247 { "int-hw", 0xCF, 0x00, true },
248 { "int-cycles-masked", 0xCD, 0x00, true },
249 { "int-cycles-masked-pending", 0xCE, 0x00, true },
250 { "fpu-exceptions", 0xDB, 0x00, true },
251 { "break-match0", 0xDC, 0x00, true },
252 { "break-match1", 0xDD, 0x00, true },
253 { "break-match2", 0xDE, 0x00, true },
254 { "break-match3", 0xDF, 0x00, true },
255 };
256
257 static struct event_table amd_f10h = {
258 .tablename = "AMD Family 10h",
259 .names = amd_f10h_names,
260 .nevents = sizeof(amd_f10h_names) /
261 sizeof(struct name_to_event),
262 .next = NULL
263 };
264
265 static struct event_table *
266 init_amd_f10h(void)
267 {
268 return &amd_f10h;
269 }
270
271 static struct event_table *
272 init_amd_generic(void)
273 {
274 unsigned int eax, ebx, ecx, edx;
275
276 eax = 0x01;
277 ebx = 0;
278 ecx = 0;
279 edx = 0;
280 x86_cpuid(&eax, &ebx, &ecx, &edx);
281
282 switch (CPUID_TO_FAMILY(eax)) {
283 case 0x10:
284 return init_amd_f10h();
285 }
286
287 return NULL;
288 }
289
290 /* -------------------------------------------------------------------------- */
291
292 int
293 tprof_event_init(uint32_t ident)
294 {
295 switch (ident) {
296 case TPROF_IDENT_NONE:
297 return -1;
298 case TPROF_IDENT_INTEL_GENERIC:
299 cpuevents = init_intel_generic();
300 break;
301 case TPROF_IDENT_AMD_GENERIC:
302 cpuevents = init_amd_generic();
303 break;
304 }
305 return (cpuevents == NULL) ? -1 : 0;
306 }
307
308 static void
309 recursive_event_list(struct event_table *table)
310 {
311 size_t i;
312
313 printf("%s:\n", table->tablename);
314 for (i = 0; i < table->nevents; i++) {
315 if (!table->names[i].enabled)
316 continue;
317 printf("\t%s\n", table->names[i].name);
318 }
319
320 if (table->next != NULL) {
321 recursive_event_list(table->next);
322 }
323 }
324
325 void
326 tprof_event_list(void)
327 {
328 recursive_event_list(cpuevents);
329 }
330
331 static void
332 recursive_event_lookup(struct event_table *table, const char *name,
333 struct tprof_param *param)
334 {
335 size_t i;
336
337 for (i = 0; i < table->nevents; i++) {
338 if (!table->names[i].enabled)
339 continue;
340 if (!strcmp(table->names[i].name, name)) {
341 param->p_event = table->names[i].event;
342 param->p_unit = table->names[i].unit;
343 return;
344 }
345 }
346
347 if (table->next != NULL) {
348 recursive_event_lookup(table->next, name, param);
349 } else {
350 errx(EXIT_FAILURE, "event '%s' unknown", name);
351 }
352 }
353
354 void
355 tprof_event_lookup(const char *name, struct tprof_param *param)
356 {
357 recursive_event_lookup(cpuevents, name, param);
358 }
359