tprof_x86.c revision 1.1 1 /* $NetBSD: tprof_x86.c,v 1.1 2018/07/13 07:56:29 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <stdbool.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <err.h>
39 #include <machine/specialreg.h>
40 #include <dev/tprof/tprof_ioctl.h>
41 #include "../tprof.h"
42
43 int tprof_event_init(uint32_t);
44 void tprof_event_list(void);
45 void tprof_event_lookup(const char *, struct tprof_param *);
46
47 struct name_to_event {
48 const char *name;
49 uint64_t event;
50 uint64_t unit;
51 bool enabled;
52 };
53
54 struct event_table {
55 const char *tablename;
56 struct name_to_event *names;
57 size_t nevents;
58 struct event_table *next;
59 };
60
61 static struct event_table *cpuevents = NULL;
62
63 static void x86_cpuid(unsigned int *eax, unsigned int *ebx,
64 unsigned int *ecx, unsigned int *edx)
65 {
66 asm volatile("cpuid"
67 : "=a" (*eax),
68 "=b" (*ebx),
69 "=c" (*ecx),
70 "=d" (*edx)
71 : "0" (*eax), "2" (*ecx));
72 }
73
74 /* -------------------------------------------------------------------------- */
75
76 /*
77 * Intel Architectural Version 1.
78 */
79 static struct name_to_event intel_arch1_names[] = {
80 /* Event Name - Event Select - UMask */
81 { "unhalted-core-cycles", 0x3C, 0x00, true },
82 { "instruction-retired", 0xC0, 0x00, true },
83 { "unhalted-reference-cycles", 0x3C, 0x01, true },
84 { "llc-reference", 0x2E, 0x4F, true },
85 { "llc-misses", 0x2E, 0x41, true },
86 { "branch-instruction-retired", 0xC4, 0x00, true },
87 { "branch-misses-retired", 0xC5, 0x00, true },
88 };
89
90 static struct event_table intel_arch1 = {
91 .tablename = "Intel Architectural Version 1",
92 .names = intel_arch1_names,
93 .nevents = sizeof(intel_arch1_names) /
94 sizeof(struct name_to_event),
95 .next = NULL
96 };
97
98 static struct event_table *
99 init_intel_arch1(void)
100 {
101 unsigned int eax, ebx, ecx, edx;
102 struct event_table *table;
103 size_t i;
104
105 eax = 0x0A;
106 ebx = 0;
107 ecx = 0;
108 edx = 0;
109 x86_cpuid(&eax, &ebx, &ecx, &edx);
110
111 table = &intel_arch1;
112 for (i = 0; i < table->nevents; i++) {
113 /* Disable the unsupported events. */
114 if ((ebx & (i << 1)) != 0)
115 table->names[i].enabled = false;
116 }
117
118 return table;
119 }
120
121 /*
122 * Intel Skylake/Kabylake. TODO: there are many more events available.
123 */
124 static struct name_to_event intel_skylake_kabylake_names[] = {
125 /* Event Name - Event Select - UMask */
126 { "itlb-misses-causes-a-walk", 0x85, 0x01, true },
127 };
128
129 static struct event_table intel_skylake_kabylake = {
130 .tablename = "Intel Skylake/Kabylake",
131 .names = intel_skylake_kabylake_names,
132 .nevents = sizeof(intel_skylake_kabylake_names) /
133 sizeof(struct name_to_event),
134 .next = NULL
135 };
136
137 static struct event_table *
138 init_intel_skylake_kabylake(void)
139 {
140 return &intel_skylake_kabylake;
141 }
142
143 static struct event_table *
144 init_intel_generic(void)
145 {
146 unsigned int eax, ebx, ecx, edx;
147 struct event_table *table;
148
149 /*
150 * The kernel made sure the Architectural Version 1 PMCs were
151 * present.
152 */
153 table = init_intel_arch1();
154
155 /*
156 * Now query the additional (non-architectural) events. They
157 * depend on the CPU model.
158 */
159 eax = 0x01;
160 ebx = 0;
161 ecx = 0;
162 edx = 0;
163 x86_cpuid(&eax, &ebx, &ecx, &edx);
164
165 switch (CPUID_TO_MODEL(eax)) {
166 case 0x4E: /* Skylake */
167 case 0x5E: /* Skylake */
168 case 0x8E: /* Kabylake */
169 case 0x9E: /* Kabylake */
170 table->next = init_intel_skylake_kabylake();
171 break;
172 }
173
174 return table;
175 }
176
177 /* -------------------------------------------------------------------------- */
178
179 /*
180 * AMD Family 10h
181 */
182 static struct name_to_event amd_f10h_names[] = {
183 { "seg-load-all", F10H_SEGMENT_REG_LOADS, 0x7f, true },
184 { "seg-load-es", F10H_SEGMENT_REG_LOADS, 0x01, true },
185 { "seg-load-cs", F10H_SEGMENT_REG_LOADS, 0x02, true },
186 { "seg-load-ss", F10H_SEGMENT_REG_LOADS, 0x04, true },
187 { "seg-load-ds", F10H_SEGMENT_REG_LOADS, 0x08, true },
188 { "seg-load-fs", F10H_SEGMENT_REG_LOADS, 0x10, true },
189 { "seg-load-gs", F10H_SEGMENT_REG_LOADS, 0x20, true },
190 { "seg-load-hs", F10H_SEGMENT_REG_LOADS, 0x40, true },
191 { "l1cache-access", F10H_DATA_CACHE_ACCESS, 0, true },
192 { "l1cache-miss", F10H_DATA_CACHE_MISS, 0, true },
193 { "l1cache-refill", F10H_DATA_CACHE_REFILL_FROM_L2, 0x1f, true },
194 { "l1cache-refill-invalid", F10H_DATA_CACHE_REFILL_FROM_L2, 0x01, true },
195 { "l1cache-refill-shared", F10H_DATA_CACHE_REFILL_FROM_L2, 0x02, true },
196 { "l1cache-refill-exclusive", F10H_DATA_CACHE_REFILL_FROM_L2, 0x04, true },
197 { "l1cache-refill-owner", F10H_DATA_CACHE_REFILL_FROM_L2, 0x08, true },
198 { "l1cache-refill-modified", F10H_DATA_CACHE_REFILL_FROM_L2, 0x10, true },
199 { "l1cache-load", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x1f, true },
200 { "l1cache-load-invalid", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x01, true },
201 { "l1cache-load-shared", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x02, true },
202 { "l1cache-load-exclusive", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x04, true },
203 { "l1cache-load-owner", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x08, true },
204 { "l1cache-load-modified", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x10, true },
205 { "l1cache-writeback", F10H_CACHE_LINES_EVICTED, 0x1f, true },
206 { "l1cache-writeback-invalid", F10H_CACHE_LINES_EVICTED, 0x01, true },
207 { "l1cache-writeback-shared", F10H_CACHE_LINES_EVICTED, 0x02, true },
208 { "l1cache-writeback-exclusive",F10H_CACHE_LINES_EVICTED, 0x04, true },
209 { "l1cache-writeback-owner", F10H_CACHE_LINES_EVICTED, 0x08, true },
210 { "l1cache-writeback-modified", F10H_CACHE_LINES_EVICTED, 0x10, true },
211 { "l1DTLB-hit-all", F10H_L1_DTLB_HIT, 0x07, true },
212 { "l1DTLB-hit-4Kpage", F10H_L1_DTLB_HIT, 0x01, true },
213 { "l1DTLB-hit-2Mpage", F10H_L1_DTLB_HIT, 0x02, true },
214 { "l1DTLB-hit-1Gpage", F10H_L1_DTLB_HIT, 0x04, true },
215 { "l1DTLB-miss-all", F10H_L1_DTLB_MISS, 0x07, true },
216 { "l1DTLB-miss-4Kpage", F10H_L1_DTLB_MISS, 0x01, true },
217 { "l1DTLB-miss-2Mpage", F10H_L1_DTLB_MISS, 0x02, true },
218 { "l1DTLB-miss-1Gpage", F10H_L1_DTLB_MISS, 0x04, true },
219 { "l2DTLB-miss-all", F10H_L2_DTLB_MISS, 0x03, true },
220 { "l2DTLB-miss-4Kpage", F10H_L2_DTLB_MISS, 0x01, true },
221 { "l2DTLB-miss-2Mpage", F10H_L2_DTLB_MISS, 0x02, true },
222 /* l2DTLB-miss-1Gpage: reserved on some revisions, so disabled */
223 { "l1ITLB-miss", F10H_L1_ITLB_MISS, 0, true },
224 { "l2ITLB-miss-all", F10H_L2_ITLB_MISS, 0x03, true },
225 { "l2ITLB-miss-4Kpage", F10H_L2_ITLB_MISS, 0x01, true },
226 { "l2ITLB-miss-2Mpage", F10H_L2_ITLB_MISS, 0x02, true },
227 { "mem-misalign-ref", F10H_MISALIGNED_ACCESS, 0, true },
228 { "ins-fetch", F10H_INSTRUCTION_CACHE_FETCH, 0, true },
229 { "ins-fetch-miss", F10H_INSTRUCTION_CACHE_MISS, 0, true },
230 { "ins-refill-l2", F10H_INSTRUCTION_CACHE_REFILL_FROM_L2, 0, true },
231 { "ins-refill-sys", F10H_INSTRUCTION_CACHE_REFILL_FROM_SYS, 0, true },
232 { "ins-fetch-stall", F10H_INSTRUCTION_FETCH_STALL, 0, true },
233 { "ins-retired", F10H_RETIRED_INSTRUCTIONS, 0, true },
234 { "ins-empty", F10H_DECODER_EMPTY, 0, true },
235 { "ops-retired", F10H_RETIRED_UOPS, 0, true },
236 { "branch-retired", F10H_RETIRED_BRANCH, 0, true },
237 { "branch-miss-retired", F10H_RETIRED_MISPREDICTED_BRANCH,0, true },
238 { "branch-taken-retired", F10H_RETIRED_TAKEN_BRANCH, 0, true },
239 { "branch-taken-miss-retired", F10H_RETIRED_TAKEN_BRANCH_MISPREDICTED, 0, true },
240 { "branch-far-retired", F10H_RETIRED_FAR_CONTROL_TRANSFER, 0, true },
241 { "branch-resync-retired", F10H_RETIRED_BRANCH_RESYNC, 0, true },
242 { "branch-near-retired", F10H_RETIRED_NEAR_RETURNS, 0, true },
243 { "branch-near-miss-retired", F10H_RETIRED_NEAR_RETURNS_MISPREDICTED, 0, true },
244 { "branch-indirect-miss-retired", F10H_RETIRED_INDIRECT_BRANCH_MISPREDICTED, 0, true },
245 { "int-hw", F10H_INTERRUPTS_TAKEN, 0, true },
246 { "int-cycles-masked", F10H_INTERRUPTS_MASKED_CYCLES, 0, true },
247 { "int-cycles-masked-pending",
248 F10H_INTERRUPTS_MASKED_CYCLES_INTERRUPT_PENDING, 0, true },
249 { "fpu-exceptions", F10H_FPU_EXCEPTIONS, 0, true },
250 { "break-match0", F10H_DR0_BREAKPOINT_MATCHES, 0, true },
251 { "break-match1", F10H_DR1_BREAKPOINT_MATCHES, 0, true },
252 { "break-match2", F10H_DR2_BREAKPOINT_MATCHES, 0, true },
253 { "break-match3", F10H_DR3_BREAKPOINT_MATCHES, 0, true },
254 };
255
256 static struct event_table amd_f10h = {
257 .tablename = "AMD Family 10h",
258 .names = amd_f10h_names,
259 .nevents = sizeof(amd_f10h_names) /
260 sizeof(struct name_to_event),
261 .next = NULL
262 };
263
264 static struct event_table *
265 init_amd_f10h(void)
266 {
267 return &amd_f10h;
268 }
269
270 static struct event_table *
271 init_amd_generic(void)
272 {
273 unsigned int eax, ebx, ecx, edx;
274
275 eax = 0x01;
276 ebx = 0;
277 ecx = 0;
278 edx = 0;
279 x86_cpuid(&eax, &ebx, &ecx, &edx);
280
281 switch (CPUID_TO_FAMILY(eax)) {
282 case 0x10:
283 return init_amd_f10h();
284 }
285
286 return NULL;
287 }
288
289 /* -------------------------------------------------------------------------- */
290
291 int
292 tprof_event_init(uint32_t ident)
293 {
294 switch (ident) {
295 case TPROF_IDENT_NONE:
296 return -1;
297 case TPROF_IDENT_INTEL_GENERIC:
298 cpuevents = init_intel_generic();
299 break;
300 case TPROF_IDENT_AMD_GENERIC:
301 cpuevents = init_amd_generic();
302 break;
303 }
304 return (cpuevents == NULL) ? -1 : 0;
305 }
306
307 static void
308 recursive_event_list(struct event_table *table)
309 {
310 size_t i;
311
312 printf("%s:\n", table->tablename);
313 for (i = 0; i < table->nevents; i++) {
314 if (!table->names[i].enabled)
315 continue;
316 printf("\t%s\n", table->names[i].name);
317 }
318 printf("\n");
319
320 if (table->next != NULL) {
321 recursive_event_list(table->next);
322 }
323 }
324
325 void
326 tprof_event_list(void)
327 {
328 recursive_event_list(cpuevents);
329 }
330
331 static void
332 recursive_event_lookup(struct event_table *table, const char *name,
333 struct tprof_param *param)
334 {
335 size_t i;
336
337 for (i = 0; i < table->nevents; i++) {
338 if (!table->names[i].enabled)
339 continue;
340 if (!strcmp(table->names[i].name, name)) {
341 param->p_event = table->names[i].event;
342 param->p_unit = table->names[i].unit;
343 return;
344 }
345 }
346
347 if (table->next != NULL) {
348 recursive_event_lookup(table->next, name, param);
349 } else {
350 errx(EXIT_FAILURE, "event '%s' unknown", name);
351 }
352 }
353
354 void
355 tprof_event_lookup(const char *name, struct tprof_param *param)
356 {
357 recursive_event_lookup(cpuevents, name, param);
358 }
359