Home | History | Annotate | Line # | Download | only in arch
tprof_x86.c revision 1.1
      1 /*	$NetBSD: tprof_x86.c,v 1.1 2018/07/13 07:56:29 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Maxime Villard.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #include <stdio.h>
     34 #include <stdlib.h>
     35 #include <stdbool.h>
     36 #include <string.h>
     37 #include <unistd.h>
     38 #include <err.h>
     39 #include <machine/specialreg.h>
     40 #include <dev/tprof/tprof_ioctl.h>
     41 #include "../tprof.h"
     42 
     43 int tprof_event_init(uint32_t);
     44 void tprof_event_list(void);
     45 void tprof_event_lookup(const char *, struct tprof_param *);
     46 
     47 struct name_to_event {
     48 	const char *name;
     49 	uint64_t event;
     50 	uint64_t unit;
     51 	bool enabled;
     52 };
     53 
     54 struct event_table {
     55 	const char *tablename;
     56 	struct name_to_event *names;
     57 	size_t nevents;
     58 	struct event_table *next;
     59 };
     60 
     61 static struct event_table *cpuevents = NULL;
     62 
     63 static void x86_cpuid(unsigned int *eax, unsigned int *ebx,
     64     unsigned int *ecx, unsigned int *edx)
     65 {
     66 	asm volatile("cpuid"
     67 	    : "=a" (*eax),
     68 	      "=b" (*ebx),
     69 	      "=c" (*ecx),
     70 	      "=d" (*edx)
     71 	    : "0" (*eax), "2" (*ecx));
     72 }
     73 
     74 /* -------------------------------------------------------------------------- */
     75 
     76 /*
     77  * Intel Architectural Version 1.
     78  */
     79 static struct name_to_event intel_arch1_names[] = {
     80 	/* Event Name - Event Select - UMask */
     81 	{ "unhalted-core-cycles",	0x3C, 0x00, true },
     82 	{ "instruction-retired",	0xC0, 0x00, true },
     83 	{ "unhalted-reference-cycles",	0x3C, 0x01, true },
     84 	{ "llc-reference",		0x2E, 0x4F, true },
     85 	{ "llc-misses",			0x2E, 0x41, true },
     86 	{ "branch-instruction-retired",	0xC4, 0x00, true },
     87 	{ "branch-misses-retired",	0xC5, 0x00, true },
     88 };
     89 
     90 static struct event_table intel_arch1 = {
     91 	.tablename = "Intel Architectural Version 1",
     92 	.names = intel_arch1_names,
     93 	.nevents = sizeof(intel_arch1_names) /
     94 	    sizeof(struct name_to_event),
     95 	.next = NULL
     96 };
     97 
     98 static struct event_table *
     99 init_intel_arch1(void)
    100 {
    101 	unsigned int eax, ebx, ecx, edx;
    102 	struct event_table *table;
    103 	size_t i;
    104 
    105 	eax = 0x0A;
    106 	ebx = 0;
    107 	ecx = 0;
    108 	edx = 0;
    109 	x86_cpuid(&eax, &ebx, &ecx, &edx);
    110 
    111 	table = &intel_arch1;
    112 	for (i = 0; i < table->nevents; i++) {
    113 		/* Disable the unsupported events. */
    114 		if ((ebx & (i << 1)) != 0)
    115 			table->names[i].enabled = false;
    116 	}
    117 
    118 	return table;
    119 }
    120 
    121 /*
    122  * Intel Skylake/Kabylake. TODO: there are many more events available.
    123  */
    124 static struct name_to_event intel_skylake_kabylake_names[] = {
    125 	/* Event Name - Event Select - UMask */
    126 	{ "itlb-misses-causes-a-walk",	0x85, 0x01, true },
    127 };
    128 
    129 static struct event_table intel_skylake_kabylake = {
    130 	.tablename = "Intel Skylake/Kabylake",
    131 	.names = intel_skylake_kabylake_names,
    132 	.nevents = sizeof(intel_skylake_kabylake_names) /
    133 	    sizeof(struct name_to_event),
    134 	.next = NULL
    135 };
    136 
    137 static struct event_table *
    138 init_intel_skylake_kabylake(void)
    139 {
    140 	return &intel_skylake_kabylake;
    141 }
    142 
    143 static struct event_table *
    144 init_intel_generic(void)
    145 {
    146 	unsigned int eax, ebx, ecx, edx;
    147 	struct event_table *table;
    148 
    149 	/*
    150 	 * The kernel made sure the Architectural Version 1 PMCs were
    151 	 * present.
    152 	 */
    153 	table = init_intel_arch1();
    154 
    155 	/*
    156 	 * Now query the additional (non-architectural) events. They
    157 	 * depend on the CPU model.
    158 	 */
    159 	eax = 0x01;
    160 	ebx = 0;
    161 	ecx = 0;
    162 	edx = 0;
    163 	x86_cpuid(&eax, &ebx, &ecx, &edx);
    164 
    165 	switch (CPUID_TO_MODEL(eax)) {
    166 	case 0x4E: /* Skylake */
    167 	case 0x5E: /* Skylake */
    168 	case 0x8E: /* Kabylake */
    169 	case 0x9E: /* Kabylake */
    170 		table->next = init_intel_skylake_kabylake();
    171 		break;
    172 	}
    173 
    174 	return table;
    175 }
    176 
    177 /* -------------------------------------------------------------------------- */
    178 
    179 /*
    180  * AMD Family 10h
    181  */
    182 static struct name_to_event amd_f10h_names[] = {
    183 	{ "seg-load-all",		F10H_SEGMENT_REG_LOADS,		0x7f, true },
    184 	{ "seg-load-es",		F10H_SEGMENT_REG_LOADS,		0x01, true },
    185 	{ "seg-load-cs",		F10H_SEGMENT_REG_LOADS,		0x02, true },
    186 	{ "seg-load-ss",		F10H_SEGMENT_REG_LOADS,		0x04, true },
    187 	{ "seg-load-ds",		F10H_SEGMENT_REG_LOADS,		0x08, true },
    188 	{ "seg-load-fs",		F10H_SEGMENT_REG_LOADS,		0x10, true },
    189 	{ "seg-load-gs",		F10H_SEGMENT_REG_LOADS,		0x20, true },
    190 	{ "seg-load-hs",		F10H_SEGMENT_REG_LOADS,		0x40, true },
    191 	{ "l1cache-access",		F10H_DATA_CACHE_ACCESS,		0, true },
    192 	{ "l1cache-miss",		F10H_DATA_CACHE_MISS,		0, true },
    193 	{ "l1cache-refill",		F10H_DATA_CACHE_REFILL_FROM_L2,	0x1f, true },
    194 	{ "l1cache-refill-invalid",	F10H_DATA_CACHE_REFILL_FROM_L2,	0x01, true },
    195 	{ "l1cache-refill-shared",	F10H_DATA_CACHE_REFILL_FROM_L2,	0x02, true },
    196 	{ "l1cache-refill-exclusive",	F10H_DATA_CACHE_REFILL_FROM_L2,	0x04, true },
    197 	{ "l1cache-refill-owner",	F10H_DATA_CACHE_REFILL_FROM_L2,	0x08, true },
    198 	{ "l1cache-refill-modified",	F10H_DATA_CACHE_REFILL_FROM_L2,	0x10, true },
    199 	{ "l1cache-load",		F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x1f, true },
    200 	{ "l1cache-load-invalid",	F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x01, true },
    201 	{ "l1cache-load-shared",	F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x02, true },
    202 	{ "l1cache-load-exclusive",	F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x04, true },
    203 	{ "l1cache-load-owner",		F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x08, true },
    204 	{ "l1cache-load-modified",	F10H_DATA_CACHE_REFILL_FROM_NORTHBRIDGE,0x10, true },
    205 	{ "l1cache-writeback",		F10H_CACHE_LINES_EVICTED,	0x1f, true },
    206 	{ "l1cache-writeback-invalid",	F10H_CACHE_LINES_EVICTED,	0x01, true },
    207 	{ "l1cache-writeback-shared",	F10H_CACHE_LINES_EVICTED,	0x02, true },
    208 	{ "l1cache-writeback-exclusive",F10H_CACHE_LINES_EVICTED,	0x04, true },
    209 	{ "l1cache-writeback-owner",	F10H_CACHE_LINES_EVICTED,	0x08, true },
    210 	{ "l1cache-writeback-modified",	F10H_CACHE_LINES_EVICTED,	0x10, true },
    211 	{ "l1DTLB-hit-all",		F10H_L1_DTLB_HIT,		0x07, true },
    212 	{ "l1DTLB-hit-4Kpage",		F10H_L1_DTLB_HIT,		0x01, true },
    213 	{ "l1DTLB-hit-2Mpage",		F10H_L1_DTLB_HIT,		0x02, true },
    214 	{ "l1DTLB-hit-1Gpage",		F10H_L1_DTLB_HIT,		0x04, true },
    215 	{ "l1DTLB-miss-all",		F10H_L1_DTLB_MISS,		0x07, true },
    216 	{ "l1DTLB-miss-4Kpage",		F10H_L1_DTLB_MISS,		0x01, true },
    217 	{ "l1DTLB-miss-2Mpage",		F10H_L1_DTLB_MISS,		0x02, true },
    218 	{ "l1DTLB-miss-1Gpage",		F10H_L1_DTLB_MISS,		0x04, true },
    219 	{ "l2DTLB-miss-all",		F10H_L2_DTLB_MISS,		0x03, true },
    220 	{ "l2DTLB-miss-4Kpage",		F10H_L2_DTLB_MISS,		0x01, true },
    221 	{ "l2DTLB-miss-2Mpage",		F10H_L2_DTLB_MISS,		0x02, true },
    222 	/* l2DTLB-miss-1Gpage: reserved on some revisions, so disabled */
    223 	{ "l1ITLB-miss",		F10H_L1_ITLB_MISS,		0, true },
    224 	{ "l2ITLB-miss-all",		F10H_L2_ITLB_MISS,		0x03, true },
    225 	{ "l2ITLB-miss-4Kpage",		F10H_L2_ITLB_MISS,		0x01, true },
    226 	{ "l2ITLB-miss-2Mpage",		F10H_L2_ITLB_MISS,		0x02, true },
    227 	{ "mem-misalign-ref",		F10H_MISALIGNED_ACCESS,		0, true },
    228 	{ "ins-fetch",			F10H_INSTRUCTION_CACHE_FETCH,	0, true },
    229 	{ "ins-fetch-miss",		F10H_INSTRUCTION_CACHE_MISS,	0, true },
    230 	{ "ins-refill-l2",		F10H_INSTRUCTION_CACHE_REFILL_FROM_L2,	0, true },
    231 	{ "ins-refill-sys",		F10H_INSTRUCTION_CACHE_REFILL_FROM_SYS,	0, true },
    232 	{ "ins-fetch-stall",		F10H_INSTRUCTION_FETCH_STALL,	0, true },
    233 	{ "ins-retired",		F10H_RETIRED_INSTRUCTIONS,	0, true },
    234 	{ "ins-empty",			F10H_DECODER_EMPTY,	0, true },
    235 	{ "ops-retired",		F10H_RETIRED_UOPS,		0, true },
    236 	{ "branch-retired",		F10H_RETIRED_BRANCH,		0, true },
    237 	{ "branch-miss-retired",	F10H_RETIRED_MISPREDICTED_BRANCH,0, true },
    238 	{ "branch-taken-retired",	F10H_RETIRED_TAKEN_BRANCH,	0, true },
    239 	{ "branch-taken-miss-retired",	F10H_RETIRED_TAKEN_BRANCH_MISPREDICTED,	0, true },
    240 	{ "branch-far-retired", 	F10H_RETIRED_FAR_CONTROL_TRANSFER, 0, true },
    241 	{ "branch-resync-retired",	F10H_RETIRED_BRANCH_RESYNC,	0, true },
    242 	{ "branch-near-retired",	F10H_RETIRED_NEAR_RETURNS,	0, true },
    243 	{ "branch-near-miss-retired",	F10H_RETIRED_NEAR_RETURNS_MISPREDICTED,	0, true },
    244 	{ "branch-indirect-miss-retired", F10H_RETIRED_INDIRECT_BRANCH_MISPREDICTED,	0, true },
    245 	{ "int-hw",			F10H_INTERRUPTS_TAKEN,		0, true },
    246 	{ "int-cycles-masked",		F10H_INTERRUPTS_MASKED_CYCLES,	0, true },
    247 	{ "int-cycles-masked-pending",
    248 	    F10H_INTERRUPTS_MASKED_CYCLES_INTERRUPT_PENDING, 0, true },
    249 	{ "fpu-exceptions",		F10H_FPU_EXCEPTIONS, 0, true },
    250 	{ "break-match0",		F10H_DR0_BREAKPOINT_MATCHES,	0, true },
    251 	{ "break-match1",		F10H_DR1_BREAKPOINT_MATCHES,	0, true },
    252 	{ "break-match2",		F10H_DR2_BREAKPOINT_MATCHES,	0, true },
    253 	{ "break-match3",		F10H_DR3_BREAKPOINT_MATCHES,	0, true },
    254 };
    255 
    256 static struct event_table amd_f10h = {
    257 	.tablename = "AMD Family 10h",
    258 	.names = amd_f10h_names,
    259 	.nevents = sizeof(amd_f10h_names) /
    260 	    sizeof(struct name_to_event),
    261 	.next = NULL
    262 };
    263 
    264 static struct event_table *
    265 init_amd_f10h(void)
    266 {
    267 	return &amd_f10h;
    268 }
    269 
    270 static struct event_table *
    271 init_amd_generic(void)
    272 {
    273 	unsigned int eax, ebx, ecx, edx;
    274 
    275 	eax = 0x01;
    276 	ebx = 0;
    277 	ecx = 0;
    278 	edx = 0;
    279 	x86_cpuid(&eax, &ebx, &ecx, &edx);
    280 
    281 	switch (CPUID_TO_FAMILY(eax)) {
    282 	case 0x10:
    283 		return init_amd_f10h();
    284 	}
    285 
    286 	return NULL;
    287 }
    288 
    289 /* -------------------------------------------------------------------------- */
    290 
    291 int
    292 tprof_event_init(uint32_t ident)
    293 {
    294 	switch (ident) {
    295 	case TPROF_IDENT_NONE:
    296 		return -1;
    297 	case TPROF_IDENT_INTEL_GENERIC:
    298 		cpuevents = init_intel_generic();
    299 		break;
    300 	case TPROF_IDENT_AMD_GENERIC:
    301 		cpuevents = init_amd_generic();
    302 		break;
    303 	}
    304 	return (cpuevents == NULL) ? -1 : 0;
    305 }
    306 
    307 static void
    308 recursive_event_list(struct event_table *table)
    309 {
    310 	size_t i;
    311 
    312 	printf("%s:\n", table->tablename);
    313 	for (i = 0; i < table->nevents; i++) {
    314 		if (!table->names[i].enabled)
    315 			continue;
    316 		printf("\t%s\n", table->names[i].name);
    317 	}
    318 	printf("\n");
    319 
    320 	if (table->next != NULL) {
    321 		recursive_event_list(table->next);
    322 	}
    323 }
    324 
    325 void
    326 tprof_event_list(void)
    327 {
    328 	recursive_event_list(cpuevents);
    329 }
    330 
    331 static void
    332 recursive_event_lookup(struct event_table *table, const char *name,
    333     struct tprof_param *param)
    334 {
    335 	size_t i;
    336 
    337 	for (i = 0; i < table->nevents; i++) {
    338 		if (!table->names[i].enabled)
    339 			continue;
    340 		if (!strcmp(table->names[i].name, name)) {
    341 			param->p_event = table->names[i].event;
    342 			param->p_unit = table->names[i].unit;
    343 			return;
    344 		}
    345 	}
    346 
    347 	if (table->next != NULL) {
    348 		recursive_event_lookup(table->next, name, param);
    349 	} else {
    350 		errx(EXIT_FAILURE, "event '%s' unknown", name);
    351 	}
    352 }
    353 
    354 void
    355 tprof_event_lookup(const char *name, struct tprof_param *param)
    356 {
    357 	recursive_event_lookup(cpuevents, name, param);
    358 }
    359