tprof_x86.c revision 1.1 1 1.1 maxv /* $NetBSD: tprof_x86.c,v 1.1 2018/07/13 07:56:29 maxv Exp $ */
2 1.1 maxv
3 1.1 maxv /*
4 1.1 maxv * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 1.1 maxv * All rights reserved.
6 1.1 maxv *
7 1.1 maxv * This code is derived from software contributed to The NetBSD Foundation
8 1.1 maxv * by Maxime Villard.
9 1.1 maxv *
10 1.1 maxv * Redistribution and use in source and binary forms, with or without
11 1.1 maxv * modification, are permitted provided that the following conditions
12 1.1 maxv * are met:
13 1.1 maxv * 1. Redistributions of source code must retain the above copyright
14 1.1 maxv * notice, this list of conditions and the following disclaimer.
15 1.1 maxv * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 maxv * notice, this list of conditions and the following disclaimer in the
17 1.1 maxv * documentation and/or other materials provided with the distribution.
18 1.1 maxv *
19 1.1 maxv * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 maxv * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 maxv * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 maxv * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 maxv * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 maxv * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 maxv * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 maxv * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 maxv * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 maxv * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 maxv * POSSIBILITY OF SUCH DAMAGE.
30 1.1 maxv */
31 1.1 maxv
32 1.1 maxv #include <sys/cdefs.h>
33 1.1 maxv #include <stdio.h>
34 1.1 maxv #include <stdlib.h>
35 1.1 maxv #include <stdbool.h>
36 1.1 maxv #include <string.h>
37 1.1 maxv #include <unistd.h>
38 1.1 maxv #include <err.h>
39 1.1 maxv #include <machine/specialreg.h>
40 1.1 maxv #include <dev/tprof/tprof_ioctl.h>
41 1.1 maxv #include "../tprof.h"
42 1.1 maxv
43 1.1 maxv int tprof_event_init(uint32_t);
44 1.1 maxv void tprof_event_list(void);
45 1.1 maxv void tprof_event_lookup(const char *, struct tprof_param *);
46 1.1 maxv
47 1.1 maxv struct name_to_event {
48 1.1 maxv const char *name;
49 1.1 maxv uint64_t event;
50 1.1 maxv uint64_t unit;
51 1.1 maxv bool enabled;
52 1.1 maxv };
53 1.1 maxv
54 1.1 maxv struct event_table {
55 1.1 maxv const char *tablename;
56 1.1 maxv struct name_to_event *names;
57 1.1 maxv size_t nevents;
58 1.1 maxv struct event_table *next;
59 1.1 maxv };
60 1.1 maxv
61 1.1 maxv static struct event_table *cpuevents = NULL;
62 1.1 maxv
63 1.1 maxv static void x86_cpuid(unsigned int *eax, unsigned int *ebx,
64 1.1 maxv unsigned int *ecx, unsigned int *edx)
65 1.1 maxv {
66 1.1 maxv asm volatile("cpuid"
67 1.1 maxv : "=a" (*eax),
68 1.1 maxv "=b" (*ebx),
69 1.1 maxv "=c" (*ecx),
70 1.1 maxv "=d" (*edx)
71 1.1 maxv : "0" (*eax), "2" (*ecx));
72 1.1 maxv }
73 1.1 maxv
74 1.1 maxv /* -------------------------------------------------------------------------- */
75 1.1 maxv
76 1.1 maxv /*
77 1.1 maxv * Intel Architectural Version 1.
78 1.1 maxv */
79 1.1 maxv static struct name_to_event intel_arch1_names[] = {
80 1.1 maxv /* Event Name - Event Select - UMask */
81 1.1 maxv { "unhalted-core-cycles", 0x3C, 0x00, true },
82 1.1 maxv { "instruction-retired", 0xC0, 0x00, true },
83 1.1 maxv { "unhalted-reference-cycles", 0x3C, 0x01, true },
84 1.1 maxv { "llc-reference", 0x2E, 0x4F, true },
85 1.1 maxv { "llc-misses", 0x2E, 0x41, true },
86 1.1 maxv { "branch-instruction-retired", 0xC4, 0x00, true },
87 1.1 maxv { "branch-misses-retired", 0xC5, 0x00, true },
88 1.1 maxv };
89 1.1 maxv
90 1.1 maxv static struct event_table intel_arch1 = {
91 1.1 maxv .tablename = "Intel Architectural Version 1",
92 1.1 maxv .names = intel_arch1_names,
93 1.1 maxv .nevents = sizeof(intel_arch1_names) /
94 1.1 maxv sizeof(struct name_to_event),
95 1.1 maxv .next = NULL
96 1.1 maxv };
97 1.1 maxv
98 1.1 maxv static struct event_table *
99 1.1 maxv init_intel_arch1(void)
100 1.1 maxv {
101 1.1 maxv unsigned int eax, ebx, ecx, edx;
102 1.1 maxv struct event_table *table;
103 1.1 maxv size_t i;
104 1.1 maxv
105 1.1 maxv eax = 0x0A;
106 1.1 maxv ebx = 0;
107 1.1 maxv ecx = 0;
108 1.1 maxv edx = 0;
109 1.1 maxv x86_cpuid(&eax, &ebx, &ecx, &edx);
110 1.1 maxv
111 1.1 maxv table = &intel_arch1;
112 1.1 maxv for (i = 0; i < table->nevents; i++) {
113 1.1 maxv /* Disable the unsupported events. */
114 1.1 maxv if ((ebx & (i << 1)) != 0)
115 1.1 maxv table->names[i].enabled = false;
116 1.1 maxv }
117 1.1 maxv
118 1.1 maxv return table;
119 1.1 maxv }
120 1.1 maxv
121 1.1 maxv /*
122 1.1 maxv * Intel Skylake/Kabylake. TODO: there are many more events available.
123 1.1 maxv */
124 1.1 maxv static struct name_to_event intel_skylake_kabylake_names[] = {
125 1.1 maxv /* Event Name - Event Select - UMask */
126 1.1 maxv { "itlb-misses-causes-a-walk", 0x85, 0x01, true },
127 1.1 maxv };
128 1.1 maxv
129 1.1 maxv static struct event_table intel_skylake_kabylake = {
130 1.1 maxv .tablename = "Intel Skylake/Kabylake",
131 1.1 maxv .names = intel_skylake_kabylake_names,
132 1.1 maxv .nevents = sizeof(intel_skylake_kabylake_names) /
133 1.1 maxv sizeof(struct name_to_event),
134 1.1 maxv .next = NULL
135 1.1 maxv };
136 1.1 maxv
137 1.1 maxv static struct event_table *
138 1.1 maxv init_intel_skylake_kabylake(void)
139 1.1 maxv {
140 1.1 maxv return &intel_skylake_kabylake;
141 1.1 maxv }
142 1.1 maxv
143 1.1 maxv static struct event_table *
144 1.1 maxv init_intel_generic(void)
145 1.1 maxv {
146 1.1 maxv unsigned int eax, ebx, ecx, edx;
147 1.1 maxv struct event_table *table;
148 1.1 maxv
149 1.1 maxv /*
150 1.1 maxv * The kernel made sure the Architectural Version 1 PMCs were
151 1.1 maxv * present.
152 1.1 maxv */
153 1.1 maxv table = init_intel_arch1();
154 1.1 maxv
155 1.1 maxv /*
156 1.1 maxv * Now query the additional (non-architectural) events. They
157 1.1 maxv * depend on the CPU model.
158 1.1 maxv */
159 1.1 maxv eax = 0x01;
160 1.1 maxv ebx = 0;
161 1.1 maxv ecx = 0;
162 1.1 maxv edx = 0;
163 1.1 maxv x86_cpuid(&eax, &ebx, &ecx, &edx);
164 1.1 maxv
165 1.1 maxv switch (CPUID_TO_MODEL(eax)) {
166 1.1 maxv case 0x4E: /* Skylake */
167 1.1 maxv case 0x5E: /* Skylake */
168 1.1 maxv case 0x8E: /* Kabylake */
169 1.1 maxv case 0x9E: /* Kabylake */
170 1.1 maxv table->next = init_intel_skylake_kabylake();
171 1.1 maxv break;
172 1.1 maxv }
173 1.1 maxv
174 1.1 maxv return table;
175 1.1 maxv }
176 1.1 maxv
177 1.1 maxv /* -------------------------------------------------------------------------- */
178 1.1 maxv
179 1.1 maxv /*
180 1.1 maxv * AMD Family 10h
181 1.1 maxv */
182 1.1 maxv static struct name_to_event amd_f10h_names[] = {
183 1.1 maxv { "seg-load-all", F10H_SEGMENT_REG_LOADS, 0x7f, true },
184 1.1 maxv { "seg-load-es", F10H_SEGMENT_REG_LOADS, 0x01, true },
185 1.1 maxv { "seg-load-cs", F10H_SEGMENT_REG_LOADS, 0x02, true },
186 1.1 maxv { "seg-load-ss", F10H_SEGMENT_REG_LOADS, 0x04, true },
187 1.1 maxv { "seg-load-ds", F10H_SEGMENT_REG_LOADS, 0x08, true },
188 1.1 maxv { "seg-load-fs", F10H_SEGMENT_REG_LOADS, 0x10, true },
189 1.1 maxv { "seg-load-gs", F10H_SEGMENT_REG_LOADS, 0x20, true },
190 1.1 maxv { "seg-load-hs", F10H_SEGMENT_REG_LOADS, 0x40, true },
191 1.1 maxv { "l1cache-access", F10H_DATA_CACHE_ACCESS, 0, true },
192 1.1 maxv { "l1cache-miss", F10H_DATA_CACHE_MISS, 0, true },
193 1.1 maxv { "l1cache-refill", F10H_DATA_CACHE_REFILL_FROM_L2, 0x1f, true },
194 1.1 maxv { "l1cache-refill-invalid", F10H_DATA_CACHE_REFILL_FROM_L2, 0x01, true },
195 1.1 maxv { "l1cache-refill-shared", F10H_DATA_CACHE_REFILL_FROM_L2, 0x02, true },
196 1.1 maxv { "l1cache-refill-exclusive", F10H_DATA_CACHE_REFILL_FROM_L2, 0x04, true },
197 1.1 maxv { "l1cache-refill-owner", F10H_DATA_CACHE_REFILL_FROM_L2, 0x08, true },
198 1.1 maxv { "l1cache-refill-modified", F10H_DATA_CACHE_REFILL_FROM_L2, 0x10, true },
199 1.1 maxv { "l1cache-load", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x1f, true },
200 1.1 maxv { "l1cache-load-invalid", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x01, true },
201 1.1 maxv { "l1cache-load-shared", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x02, true },
202 1.1 maxv { "l1cache-load-exclusive", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x04, true },
203 1.1 maxv { "l1cache-load-owner", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x08, true },
204 1.1 maxv { "l1cache-load-modified", F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x10, true },
205 1.1 maxv { "l1cache-writeback", F10H_CACHE_LINES_EVICTED, 0x1f, true },
206 1.1 maxv { "l1cache-writeback-invalid", F10H_CACHE_LINES_EVICTED, 0x01, true },
207 1.1 maxv { "l1cache-writeback-shared", F10H_CACHE_LINES_EVICTED, 0x02, true },
208 1.1 maxv { "l1cache-writeback-exclusive",F10H_CACHE_LINES_EVICTED, 0x04, true },
209 1.1 maxv { "l1cache-writeback-owner", F10H_CACHE_LINES_EVICTED, 0x08, true },
210 1.1 maxv { "l1cache-writeback-modified", F10H_CACHE_LINES_EVICTED, 0x10, true },
211 1.1 maxv { "l1DTLB-hit-all", F10H_L1_DTLB_HIT, 0x07, true },
212 1.1 maxv { "l1DTLB-hit-4Kpage", F10H_L1_DTLB_HIT, 0x01, true },
213 1.1 maxv { "l1DTLB-hit-2Mpage", F10H_L1_DTLB_HIT, 0x02, true },
214 1.1 maxv { "l1DTLB-hit-1Gpage", F10H_L1_DTLB_HIT, 0x04, true },
215 1.1 maxv { "l1DTLB-miss-all", F10H_L1_DTLB_MISS, 0x07, true },
216 1.1 maxv { "l1DTLB-miss-4Kpage", F10H_L1_DTLB_MISS, 0x01, true },
217 1.1 maxv { "l1DTLB-miss-2Mpage", F10H_L1_DTLB_MISS, 0x02, true },
218 1.1 maxv { "l1DTLB-miss-1Gpage", F10H_L1_DTLB_MISS, 0x04, true },
219 1.1 maxv { "l2DTLB-miss-all", F10H_L2_DTLB_MISS, 0x03, true },
220 1.1 maxv { "l2DTLB-miss-4Kpage", F10H_L2_DTLB_MISS, 0x01, true },
221 1.1 maxv { "l2DTLB-miss-2Mpage", F10H_L2_DTLB_MISS, 0x02, true },
222 1.1 maxv /* l2DTLB-miss-1Gpage: reserved on some revisions, so disabled */
223 1.1 maxv { "l1ITLB-miss", F10H_L1_ITLB_MISS, 0, true },
224 1.1 maxv { "l2ITLB-miss-all", F10H_L2_ITLB_MISS, 0x03, true },
225 1.1 maxv { "l2ITLB-miss-4Kpage", F10H_L2_ITLB_MISS, 0x01, true },
226 1.1 maxv { "l2ITLB-miss-2Mpage", F10H_L2_ITLB_MISS, 0x02, true },
227 1.1 maxv { "mem-misalign-ref", F10H_MISALIGNED_ACCESS, 0, true },
228 1.1 maxv { "ins-fetch", F10H_INSTRUCTION_CACHE_FETCH, 0, true },
229 1.1 maxv { "ins-fetch-miss", F10H_INSTRUCTION_CACHE_MISS, 0, true },
230 1.1 maxv { "ins-refill-l2", F10H_INSTRUCTION_CACHE_REFILL_FROM_L2, 0, true },
231 1.1 maxv { "ins-refill-sys", F10H_INSTRUCTION_CACHE_REFILL_FROM_SYS, 0, true },
232 1.1 maxv { "ins-fetch-stall", F10H_INSTRUCTION_FETCH_STALL, 0, true },
233 1.1 maxv { "ins-retired", F10H_RETIRED_INSTRUCTIONS, 0, true },
234 1.1 maxv { "ins-empty", F10H_DECODER_EMPTY, 0, true },
235 1.1 maxv { "ops-retired", F10H_RETIRED_UOPS, 0, true },
236 1.1 maxv { "branch-retired", F10H_RETIRED_BRANCH, 0, true },
237 1.1 maxv { "branch-miss-retired", F10H_RETIRED_MISPREDICTED_BRANCH,0, true },
238 1.1 maxv { "branch-taken-retired", F10H_RETIRED_TAKEN_BRANCH, 0, true },
239 1.1 maxv { "branch-taken-miss-retired", F10H_RETIRED_TAKEN_BRANCH_MISPREDICTED, 0, true },
240 1.1 maxv { "branch-far-retired", F10H_RETIRED_FAR_CONTROL_TRANSFER, 0, true },
241 1.1 maxv { "branch-resync-retired", F10H_RETIRED_BRANCH_RESYNC, 0, true },
242 1.1 maxv { "branch-near-retired", F10H_RETIRED_NEAR_RETURNS, 0, true },
243 1.1 maxv { "branch-near-miss-retired", F10H_RETIRED_NEAR_RETURNS_MISPREDICTED, 0, true },
244 1.1 maxv { "branch-indirect-miss-retired", F10H_RETIRED_INDIRECT_BRANCH_MISPREDICTED, 0, true },
245 1.1 maxv { "int-hw", F10H_INTERRUPTS_TAKEN, 0, true },
246 1.1 maxv { "int-cycles-masked", F10H_INTERRUPTS_MASKED_CYCLES, 0, true },
247 1.1 maxv { "int-cycles-masked-pending",
248 1.1 maxv F10H_INTERRUPTS_MASKED_CYCLES_INTERRUPT_PENDING, 0, true },
249 1.1 maxv { "fpu-exceptions", F10H_FPU_EXCEPTIONS, 0, true },
250 1.1 maxv { "break-match0", F10H_DR0_BREAKPOINT_MATCHES, 0, true },
251 1.1 maxv { "break-match1", F10H_DR1_BREAKPOINT_MATCHES, 0, true },
252 1.1 maxv { "break-match2", F10H_DR2_BREAKPOINT_MATCHES, 0, true },
253 1.1 maxv { "break-match3", F10H_DR3_BREAKPOINT_MATCHES, 0, true },
254 1.1 maxv };
255 1.1 maxv
256 1.1 maxv static struct event_table amd_f10h = {
257 1.1 maxv .tablename = "AMD Family 10h",
258 1.1 maxv .names = amd_f10h_names,
259 1.1 maxv .nevents = sizeof(amd_f10h_names) /
260 1.1 maxv sizeof(struct name_to_event),
261 1.1 maxv .next = NULL
262 1.1 maxv };
263 1.1 maxv
264 1.1 maxv static struct event_table *
265 1.1 maxv init_amd_f10h(void)
266 1.1 maxv {
267 1.1 maxv return &amd_f10h;
268 1.1 maxv }
269 1.1 maxv
270 1.1 maxv static struct event_table *
271 1.1 maxv init_amd_generic(void)
272 1.1 maxv {
273 1.1 maxv unsigned int eax, ebx, ecx, edx;
274 1.1 maxv
275 1.1 maxv eax = 0x01;
276 1.1 maxv ebx = 0;
277 1.1 maxv ecx = 0;
278 1.1 maxv edx = 0;
279 1.1 maxv x86_cpuid(&eax, &ebx, &ecx, &edx);
280 1.1 maxv
281 1.1 maxv switch (CPUID_TO_FAMILY(eax)) {
282 1.1 maxv case 0x10:
283 1.1 maxv return init_amd_f10h();
284 1.1 maxv }
285 1.1 maxv
286 1.1 maxv return NULL;
287 1.1 maxv }
288 1.1 maxv
289 1.1 maxv /* -------------------------------------------------------------------------- */
290 1.1 maxv
291 1.1 maxv int
292 1.1 maxv tprof_event_init(uint32_t ident)
293 1.1 maxv {
294 1.1 maxv switch (ident) {
295 1.1 maxv case TPROF_IDENT_NONE:
296 1.1 maxv return -1;
297 1.1 maxv case TPROF_IDENT_INTEL_GENERIC:
298 1.1 maxv cpuevents = init_intel_generic();
299 1.1 maxv break;
300 1.1 maxv case TPROF_IDENT_AMD_GENERIC:
301 1.1 maxv cpuevents = init_amd_generic();
302 1.1 maxv break;
303 1.1 maxv }
304 1.1 maxv return (cpuevents == NULL) ? -1 : 0;
305 1.1 maxv }
306 1.1 maxv
307 1.1 maxv static void
308 1.1 maxv recursive_event_list(struct event_table *table)
309 1.1 maxv {
310 1.1 maxv size_t i;
311 1.1 maxv
312 1.1 maxv printf("%s:\n", table->tablename);
313 1.1 maxv for (i = 0; i < table->nevents; i++) {
314 1.1 maxv if (!table->names[i].enabled)
315 1.1 maxv continue;
316 1.1 maxv printf("\t%s\n", table->names[i].name);
317 1.1 maxv }
318 1.1 maxv printf("\n");
319 1.1 maxv
320 1.1 maxv if (table->next != NULL) {
321 1.1 maxv recursive_event_list(table->next);
322 1.1 maxv }
323 1.1 maxv }
324 1.1 maxv
325 1.1 maxv void
326 1.1 maxv tprof_event_list(void)
327 1.1 maxv {
328 1.1 maxv recursive_event_list(cpuevents);
329 1.1 maxv }
330 1.1 maxv
331 1.1 maxv static void
332 1.1 maxv recursive_event_lookup(struct event_table *table, const char *name,
333 1.1 maxv struct tprof_param *param)
334 1.1 maxv {
335 1.1 maxv size_t i;
336 1.1 maxv
337 1.1 maxv for (i = 0; i < table->nevents; i++) {
338 1.1 maxv if (!table->names[i].enabled)
339 1.1 maxv continue;
340 1.1 maxv if (!strcmp(table->names[i].name, name)) {
341 1.1 maxv param->p_event = table->names[i].event;
342 1.1 maxv param->p_unit = table->names[i].unit;
343 1.1 maxv return;
344 1.1 maxv }
345 1.1 maxv }
346 1.1 maxv
347 1.1 maxv if (table->next != NULL) {
348 1.1 maxv recursive_event_lookup(table->next, name, param);
349 1.1 maxv } else {
350 1.1 maxv errx(EXIT_FAILURE, "event '%s' unknown", name);
351 1.1 maxv }
352 1.1 maxv }
353 1.1 maxv
354 1.1 maxv void
355 1.1 maxv tprof_event_lookup(const char *name, struct tprof_param *param)
356 1.1 maxv {
357 1.1 maxv recursive_event_lookup(cpuevents, name, param);
358 1.1 maxv }
359