Home | History | Annotate | Line # | Download | only in tprof
tprof_armv7.c revision 1.10
      1  1.10       ryo /* $NetBSD: tprof_armv7.c,v 1.10 2022/12/01 00:32:52 ryo Exp $ */
      2   1.1  jmcneill 
      3   1.1  jmcneill /*-
      4   1.1  jmcneill  * Copyright (c) 2018 Jared McNeill <jmcneill (at) invisible.ca>
      5   1.1  jmcneill  * All rights reserved.
      6   1.1  jmcneill  *
      7   1.1  jmcneill  * Redistribution and use in source and binary forms, with or without
      8   1.1  jmcneill  * modification, are permitted provided that the following conditions
      9   1.1  jmcneill  * are met:
     10   1.1  jmcneill  * 1. Redistributions of source code must retain the above copyright
     11   1.1  jmcneill  *    notice, this list of conditions and the following disclaimer.
     12   1.1  jmcneill  * 2. Redistributions in binary form must reproduce the above copyright
     13   1.1  jmcneill  *    notice, this list of conditions and the following disclaimer in the
     14   1.1  jmcneill  *    documentation and/or other materials provided with the distribution.
     15   1.1  jmcneill  *
     16   1.1  jmcneill  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17   1.1  jmcneill  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18   1.1  jmcneill  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19   1.1  jmcneill  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20   1.1  jmcneill  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21   1.1  jmcneill  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     22   1.1  jmcneill  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23   1.1  jmcneill  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24   1.1  jmcneill  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25   1.1  jmcneill  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26   1.1  jmcneill  * SUCH DAMAGE.
     27   1.1  jmcneill  */
     28   1.1  jmcneill 
     29   1.1  jmcneill #include <sys/cdefs.h>
     30  1.10       ryo __KERNEL_RCSID(0, "$NetBSD: tprof_armv7.c,v 1.10 2022/12/01 00:32:52 ryo Exp $");
     31   1.1  jmcneill 
     32   1.1  jmcneill #include <sys/param.h>
     33   1.1  jmcneill #include <sys/bus.h>
     34   1.1  jmcneill #include <sys/cpu.h>
     35  1.10       ryo #include <sys/percpu.h>
     36   1.1  jmcneill #include <sys/xcall.h>
     37   1.1  jmcneill 
     38   1.1  jmcneill #include <dev/tprof/tprof.h>
     39   1.1  jmcneill 
     40   1.1  jmcneill #include <arm/armreg.h>
     41   1.1  jmcneill #include <arm/locore.h>
     42   1.1  jmcneill 
     43   1.1  jmcneill #include <dev/tprof/tprof_armv7.h>
     44   1.1  jmcneill 
     45   1.5     skrll #define	PMCR_N			__BITS(15,11)
     46   1.1  jmcneill #define	PMCR_D			__BIT(3)
     47   1.1  jmcneill #define	PMCR_E			__BIT(0)
     48   1.1  jmcneill 
     49   1.8       ryo #define	PMINTEN_C		__BIT(31)
     50   1.8       ryo #define	PMINTEN_P		__BITS(30,0)
     51   1.8       ryo #define	PMCNTEN_C		__BIT(31)
     52   1.8       ryo #define	PMCNTEN_P		__BITS(30,0)
     53   1.8       ryo 
     54  1.10       ryo #define	PMOVS_C			__BIT(31)
     55  1.10       ryo #define	PMOVS_P			__BITS(30,0)
     56  1.10       ryo 
     57   1.1  jmcneill #define	PMEVTYPER_P		__BIT(31)
     58   1.1  jmcneill #define	PMEVTYPER_U		__BIT(30)
     59   1.1  jmcneill #define	PMEVTYPER_EVTCOUNT	__BITS(7,0)
     60   1.1  jmcneill 
     61   1.7  jmcneill static uint16_t cortexa9_events[] = {
     62   1.7  jmcneill 	0x40, 0x41, 0x42,
     63   1.7  jmcneill 	0x50, 0x51,
     64   1.7  jmcneill 	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
     65   1.7  jmcneill 	0x6e,
     66   1.7  jmcneill 	0x70, 0x71, 0x72, 0x73, 0x74,
     67   1.7  jmcneill 	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86,
     68   1.7  jmcneill 	0x8a, 0x8b,
     69   1.7  jmcneill 	0x90, 0x91, 0x92, 0x93,
     70   1.7  jmcneill 	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5
     71   1.7  jmcneill };
     72   1.7  jmcneill 
     73   1.1  jmcneill static bool
     74   1.1  jmcneill armv7_pmu_event_implemented(uint16_t event)
     75   1.1  jmcneill {
     76   1.7  jmcneill 	if (CPU_ID_CORTEX_A9_P(curcpu()->ci_midr)) {
     77   1.7  jmcneill 		/* Cortex-A9 with PMUv1 lacks PMCEID0/1 */
     78   1.7  jmcneill 		u_int n;
     79   1.7  jmcneill 
     80   1.7  jmcneill 		/* Events specific to the Cortex-A9 */
     81   1.7  jmcneill 		for (n = 0; n < __arraycount(cortexa9_events); n++) {
     82   1.7  jmcneill 			if (cortexa9_events[n] == event) {
     83   1.7  jmcneill 				return true;
     84   1.7  jmcneill 			}
     85   1.7  jmcneill 		}
     86   1.7  jmcneill 		/* Supported architectural events */
     87   1.7  jmcneill 		if (event != 0x08 && event != 0x0e && event < 0x1e) {
     88   1.7  jmcneill 			return true;
     89   1.7  jmcneill 		}
     90   1.7  jmcneill 	} else {
     91   1.7  jmcneill 		/* PMUv2 */
     92   1.7  jmcneill 		uint32_t eid[2];
     93   1.7  jmcneill 
     94   1.7  jmcneill 		if (event >= 64) {
     95   1.7  jmcneill 			return false;
     96   1.7  jmcneill 		}
     97   1.7  jmcneill 
     98   1.7  jmcneill 		eid[0] = armreg_pmceid0_read();
     99   1.7  jmcneill 		eid[1] = armreg_pmceid1_read();
    100   1.7  jmcneill 
    101   1.7  jmcneill 		const u_int idx = event / 32;
    102   1.7  jmcneill 		const u_int bit = event % 32;
    103   1.7  jmcneill 
    104   1.7  jmcneill 		if (eid[idx] & __BIT(bit)) {
    105   1.7  jmcneill 			return true;
    106   1.7  jmcneill 		}
    107   1.7  jmcneill 	}
    108   1.1  jmcneill 
    109   1.1  jmcneill 	return false;
    110   1.1  jmcneill }
    111   1.1  jmcneill 
    112   1.1  jmcneill static void
    113   1.1  jmcneill armv7_pmu_set_pmevtyper(u_int counter, uint64_t val)
    114   1.1  jmcneill {
    115   1.1  jmcneill 	armreg_pmselr_write(counter);
    116   1.4     skrll 	isb();
    117   1.1  jmcneill 	armreg_pmxevtyper_write(val);
    118   1.1  jmcneill }
    119   1.1  jmcneill 
    120  1.10       ryo static inline void
    121   1.1  jmcneill armv7_pmu_set_pmevcntr(u_int counter, uint32_t val)
    122   1.1  jmcneill {
    123   1.1  jmcneill 	armreg_pmselr_write(counter);
    124   1.4     skrll 	isb();
    125   1.1  jmcneill 	armreg_pmxevcntr_write(val);
    126   1.1  jmcneill }
    127   1.1  jmcneill 
    128  1.10       ryo static inline uint64_t
    129  1.10       ryo armv7_pmu_get_pmevcntr(u_int counter)
    130   1.1  jmcneill {
    131  1.10       ryo 	armreg_pmselr_write(counter);
    132  1.10       ryo 	isb();
    133  1.10       ryo 	return armreg_pmxevcntr_read();
    134  1.10       ryo }
    135   1.1  jmcneill 
    136  1.10       ryo /* read and write at once */
    137  1.10       ryo static inline uint64_t
    138  1.10       ryo armv7_pmu_getset_pmevcntr(u_int counter, uint64_t val)
    139  1.10       ryo {
    140  1.10       ryo 	uint64_t c;
    141   1.1  jmcneill 
    142  1.10       ryo 	armreg_pmselr_write(counter);
    143  1.10       ryo 	isb();
    144  1.10       ryo 	c = armreg_pmxevcntr_read();
    145  1.10       ryo 	armreg_pmxevcntr_write(val);
    146  1.10       ryo 	return c;
    147  1.10       ryo }
    148   1.1  jmcneill 
    149  1.10       ryo static uint32_t
    150  1.10       ryo armv7_pmu_ncounters(void)
    151  1.10       ryo {
    152  1.10       ryo 	return __SHIFTOUT(armreg_pmcr_read(), PMCR_N);
    153   1.1  jmcneill }
    154   1.1  jmcneill 
    155  1.10       ryo static u_int
    156  1.10       ryo armv7_pmu_counter_bitwidth(u_int counter)
    157   1.1  jmcneill {
    158  1.10       ryo 	return 32;
    159   1.1  jmcneill }
    160   1.1  jmcneill 
    161   1.1  jmcneill static uint64_t
    162  1.10       ryo armv7_pmu_counter_estimate_freq(u_int counter)
    163   1.1  jmcneill {
    164   1.1  jmcneill 	uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq;
    165   1.1  jmcneill 
    166  1.10       ryo 	if (ISSET(armreg_pmcr_read(), PMCR_D))
    167  1.10       ryo 		cpufreq /= 64;
    168  1.10       ryo 	return cpufreq;
    169   1.1  jmcneill }
    170   1.1  jmcneill 
    171   1.1  jmcneill static int
    172  1.10       ryo armv7_pmu_valid_event(u_int counter, const tprof_param_t *param)
    173   1.1  jmcneill {
    174   1.1  jmcneill 	if (!armv7_pmu_event_implemented(param->p_event)) {
    175  1.10       ryo 		printf("%s: event %#" PRIx64 " not implemented on this CPU\n",
    176   1.1  jmcneill 		    __func__, param->p_event);
    177   1.1  jmcneill 		return EINVAL;
    178   1.1  jmcneill 	}
    179  1.10       ryo 	return 0;
    180  1.10       ryo }
    181   1.1  jmcneill 
    182  1.10       ryo static void
    183  1.10       ryo armv7_pmu_configure_event(u_int counter, const tprof_param_t *param)
    184  1.10       ryo {
    185  1.10       ryo 	/* Disable event counter */
    186  1.10       ryo 	armreg_pmcntenclr_write(__BIT(counter) & PMCNTEN_P);
    187  1.10       ryo 
    188  1.10       ryo 	/* Disable overflow interrupts */
    189  1.10       ryo 	armreg_pmintenclr_write(__BIT(counter) & PMINTEN_P);
    190   1.1  jmcneill 
    191  1.10       ryo 	/* Configure event counter */
    192  1.10       ryo 	uint32_t pmevtyper = __SHIFTIN(param->p_event, PMEVTYPER_EVTCOUNT);
    193  1.10       ryo 	if (!ISSET(param->p_flags, TPROF_PARAM_USER))
    194  1.10       ryo 		pmevtyper |= PMEVTYPER_U;
    195  1.10       ryo 	if (!ISSET(param->p_flags, TPROF_PARAM_KERN))
    196  1.10       ryo 		pmevtyper |= PMEVTYPER_P;
    197  1.10       ryo 	armv7_pmu_set_pmevtyper(counter, pmevtyper);
    198  1.10       ryo 
    199  1.10       ryo 	/*
    200  1.10       ryo 	 * Enable overflow interrupts.
    201  1.10       ryo 	 * Whether profiled or not, the counter width of armv7 is 32 bits,
    202  1.10       ryo 	 * so overflow handling is required anyway.
    203  1.10       ryo 	 */
    204  1.10       ryo 	armreg_pmintenset_write(__BIT(counter) & PMINTEN_P);
    205  1.10       ryo 
    206  1.10       ryo 	/* Clear overflow flag */
    207  1.10       ryo 	armreg_pmovsr_write(__BIT(counter) & PMOVS_P);
    208   1.1  jmcneill 
    209  1.10       ryo 	/* reset the counter */
    210  1.10       ryo 	armv7_pmu_set_pmevcntr(counter, param->p_value);
    211   1.1  jmcneill }
    212   1.1  jmcneill 
    213   1.1  jmcneill static void
    214  1.10       ryo armv7_pmu_start(tprof_countermask_t runmask)
    215   1.1  jmcneill {
    216  1.10       ryo 	/* Enable event counters */
    217  1.10       ryo 	armreg_pmcntenset_write(runmask & PMCNTEN_P);
    218  1.10       ryo 
    219  1.10       ryo 	/*
    220  1.10       ryo 	 * PMCR.E is shared with PMCCNTR and event counters.
    221  1.10       ryo 	 * It is set here in case PMCCNTR is not used in the system.
    222  1.10       ryo 	 */
    223  1.10       ryo 	armreg_pmcr_write(armreg_pmcr_read() | PMCR_E);
    224  1.10       ryo }
    225   1.1  jmcneill 
    226  1.10       ryo static void
    227  1.10       ryo armv7_pmu_stop(tprof_countermask_t stopmask)
    228  1.10       ryo {
    229  1.10       ryo 	/* Disable event counter */
    230  1.10       ryo 	armreg_pmcntenclr_write(stopmask & PMCNTEN_P);
    231   1.1  jmcneill }
    232   1.1  jmcneill 
    233  1.10       ryo /* XXX: argument of armv8_pmu_intr() */
    234  1.10       ryo extern struct tprof_backend *tprof_backend;
    235  1.10       ryo static void *pmu_intr_arg;
    236   1.1  jmcneill 
    237   1.1  jmcneill int
    238   1.1  jmcneill armv7_pmu_intr(void *priv)
    239   1.1  jmcneill {
    240   1.1  jmcneill 	const struct trapframe * const tf = priv;
    241  1.10       ryo 	tprof_backend_softc_t *sc = pmu_intr_arg;
    242   1.1  jmcneill 	tprof_frame_info_t tfi;
    243  1.10       ryo 	int bit;
    244  1.10       ryo 	const uint32_t pmovs = armreg_pmovsr_read() & PMOVS_P;
    245   1.1  jmcneill 
    246  1.10       ryo 	uint64_t *counters_offset =
    247  1.10       ryo 	    percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu());
    248  1.10       ryo 	uint32_t mask = pmovs;
    249  1.10       ryo 	while ((bit = ffs(mask)) != 0) {
    250  1.10       ryo 		bit--;
    251  1.10       ryo 		CLR(mask, __BIT(bit));
    252  1.10       ryo 
    253  1.10       ryo 		if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) {
    254  1.10       ryo 			/* account for the counter, and reset */
    255  1.10       ryo 			uint64_t ctr = armv7_pmu_getset_pmevcntr(bit,
    256  1.10       ryo 			    sc->sc_count[bit].ctr_counter_reset_val);
    257  1.10       ryo 			counters_offset[bit] +=
    258  1.10       ryo 			    sc->sc_count[bit].ctr_counter_val + ctr;
    259  1.10       ryo 
    260  1.10       ryo 			/* record a sample */
    261  1.10       ryo 			tfi.tfi_pc = tf->tf_pc;
    262  1.10       ryo 			tfi.tfi_counter = bit;
    263  1.10       ryo 			tfi.tfi_inkernel =
    264  1.10       ryo 			    tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS &&
    265  1.10       ryo 			    tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS;
    266  1.10       ryo 			tprof_sample(NULL, &tfi);
    267  1.10       ryo 		} else {
    268  1.10       ryo 			/* counter has overflowed */
    269  1.10       ryo 			counters_offset[bit] += __BIT(32);
    270  1.10       ryo 		}
    271   1.1  jmcneill 	}
    272  1.10       ryo 	armreg_pmovsr_write(pmovs);
    273   1.1  jmcneill 
    274   1.1  jmcneill 	return 1;
    275   1.1  jmcneill }
    276   1.1  jmcneill 
    277  1.10       ryo static uint32_t
    278  1.10       ryo armv7_pmu_ident(void)
    279  1.10       ryo {
    280  1.10       ryo 	return TPROF_IDENT_ARMV7_GENERIC;
    281  1.10       ryo }
    282  1.10       ryo 
    283  1.10       ryo static const tprof_backend_ops_t tprof_armv7_pmu_ops = {
    284  1.10       ryo 	.tbo_ident = armv7_pmu_ident,
    285  1.10       ryo 	.tbo_ncounters = armv7_pmu_ncounters,
    286  1.10       ryo 	.tbo_counter_bitwidth = armv7_pmu_counter_bitwidth,
    287  1.10       ryo 	.tbo_counter_read = armv7_pmu_get_pmevcntr,
    288  1.10       ryo 	.tbo_counter_estimate_freq = armv7_pmu_counter_estimate_freq,
    289  1.10       ryo 	.tbo_valid_event = armv7_pmu_valid_event,
    290  1.10       ryo 	.tbo_configure_event = armv7_pmu_configure_event,
    291  1.10       ryo 	.tbo_start = armv7_pmu_start,
    292  1.10       ryo 	.tbo_stop = armv7_pmu_stop,
    293  1.10       ryo 	.tbo_establish = NULL,
    294  1.10       ryo 	.tbo_disestablish = NULL,
    295  1.10       ryo };
    296  1.10       ryo 
    297   1.9       ryo static void
    298   1.9       ryo armv7_pmu_init_cpu(void *arg1, void *arg2)
    299   1.1  jmcneill {
    300   1.2  jmcneill 	/* Disable user mode access to performance monitors */
    301   1.2  jmcneill 	armreg_pmuserenr_write(0);
    302   1.2  jmcneill 
    303   1.2  jmcneill 	/* Disable interrupts */
    304   1.8       ryo 	armreg_pmintenclr_write(PMINTEN_P);
    305   1.2  jmcneill 
    306   1.2  jmcneill 	/* Disable counters */
    307   1.8       ryo 	armreg_pmcntenclr_write(PMCNTEN_P);
    308   1.9       ryo }
    309   1.9       ryo 
    310   1.9       ryo int
    311   1.9       ryo armv7_pmu_init(void)
    312   1.9       ryo {
    313  1.10       ryo 	int error, ncounters;
    314  1.10       ryo 
    315  1.10       ryo 	ncounters = armv7_pmu_ncounters();
    316  1.10       ryo 	if (ncounters == 0)
    317  1.10       ryo 		return ENOTSUP;
    318  1.10       ryo 
    319   1.9       ryo 	uint64_t xc = xc_broadcast(0, armv7_pmu_init_cpu, NULL, NULL);
    320   1.9       ryo 	xc_wait(xc);
    321   1.2  jmcneill 
    322  1.10       ryo 	error = tprof_backend_register("tprof_armv7", &tprof_armv7_pmu_ops,
    323   1.1  jmcneill 	    TPROF_BACKEND_VERSION);
    324  1.10       ryo 	if (error == 0) {
    325  1.10       ryo 		/* XXX: for argument of armv7_pmu_intr() */
    326  1.10       ryo 		pmu_intr_arg = tprof_backend;
    327  1.10       ryo 	}
    328  1.10       ryo 
    329  1.10       ryo 	return error;
    330   1.1  jmcneill }
    331