Home | History | Annotate | Line # | Download | only in tprof
      1  1.20   msaitoh /* $NetBSD: tprof_armv8.c,v 1.20 2023/04/11 10:07:12 msaitoh Exp $ */
      2   1.1  jmcneill 
      3   1.1  jmcneill /*-
      4   1.1  jmcneill  * Copyright (c) 2018 Jared McNeill <jmcneill (at) invisible.ca>
      5   1.1  jmcneill  * All rights reserved.
      6   1.1  jmcneill  *
      7   1.1  jmcneill  * Redistribution and use in source and binary forms, with or without
      8   1.1  jmcneill  * modification, are permitted provided that the following conditions
      9   1.1  jmcneill  * are met:
     10   1.1  jmcneill  * 1. Redistributions of source code must retain the above copyright
     11   1.1  jmcneill  *    notice, this list of conditions and the following disclaimer.
     12   1.1  jmcneill  * 2. Redistributions in binary form must reproduce the above copyright
     13   1.1  jmcneill  *    notice, this list of conditions and the following disclaimer in the
     14   1.1  jmcneill  *    documentation and/or other materials provided with the distribution.
     15   1.1  jmcneill  *
     16   1.1  jmcneill  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17   1.1  jmcneill  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18   1.1  jmcneill  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19   1.1  jmcneill  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20   1.1  jmcneill  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     21   1.1  jmcneill  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     22   1.1  jmcneill  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
     23   1.1  jmcneill  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24   1.1  jmcneill  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25   1.1  jmcneill  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26   1.1  jmcneill  * SUCH DAMAGE.
     27   1.1  jmcneill  */
     28   1.1  jmcneill 
     29   1.1  jmcneill #include <sys/cdefs.h>
     30  1.20   msaitoh __KERNEL_RCSID(0, "$NetBSD: tprof_armv8.c,v 1.20 2023/04/11 10:07:12 msaitoh Exp $");
     31   1.1  jmcneill 
     32   1.1  jmcneill #include <sys/param.h>
     33   1.1  jmcneill #include <sys/bus.h>
     34   1.1  jmcneill #include <sys/cpu.h>
     35  1.18       ryo #include <sys/percpu.h>
     36   1.1  jmcneill #include <sys/xcall.h>
     37   1.1  jmcneill 
     38   1.1  jmcneill #include <dev/tprof/tprof.h>
     39   1.1  jmcneill 
     40   1.1  jmcneill #include <arm/armreg.h>
     41   1.6     skrll #include <arm/cpufunc.h>
     42   1.1  jmcneill 
     43   1.1  jmcneill #include <dev/tprof/tprof_armv8.h>
     44   1.1  jmcneill 
     45  1.18       ryo static u_int counter_bitwidth;
     46  1.18       ryo 
     47  1.18       ryo /*
     48  1.18       ryo  * armv8 can handle up to 31 event counters,
     49  1.18       ryo  * PMCR_EL0.N counters are actually available.
     50  1.18       ryo  */
     51   1.1  jmcneill 
     52   1.1  jmcneill static bool
     53   1.1  jmcneill armv8_pmu_event_implemented(uint16_t event)
     54   1.1  jmcneill {
     55   1.1  jmcneill 	uint64_t eid[2];
     56   1.1  jmcneill 
     57   1.1  jmcneill 	if (event >= 64)
     58   1.1  jmcneill 		return false;
     59   1.1  jmcneill 
     60   1.1  jmcneill 	eid[0] = reg_pmceid0_el0_read();
     61   1.1  jmcneill 	eid[1] = reg_pmceid1_el0_read();
     62   1.1  jmcneill 
     63  1.13     skrll 	/* The low 32bits of PMCEID[01]_EL0 contain the common events 0 to n */
     64   1.1  jmcneill 	const u_int idx = event / 32;
     65   1.1  jmcneill 	const u_int bit = event % 32;
     66   1.1  jmcneill 
     67   1.1  jmcneill 	if (eid[idx] & __BIT(bit))
     68   1.1  jmcneill 		return true;
     69   1.1  jmcneill 
     70   1.1  jmcneill 	return false;
     71   1.1  jmcneill }
     72   1.1  jmcneill 
     73   1.1  jmcneill static void
     74   1.1  jmcneill armv8_pmu_set_pmevtyper(u_int counter, uint64_t val)
     75   1.1  jmcneill {
     76   1.1  jmcneill 	reg_pmselr_el0_write(counter);
     77   1.6     skrll 	isb();
     78   1.1  jmcneill 	reg_pmxevtyper_el0_write(val);
     79   1.1  jmcneill }
     80   1.1  jmcneill 
     81  1.18       ryo static inline void
     82  1.18       ryo armv8_pmu_set_pmevcntr(u_int counter, uint64_t val)
     83   1.1  jmcneill {
     84   1.1  jmcneill 	reg_pmselr_el0_write(counter);
     85   1.6     skrll 	isb();
     86   1.1  jmcneill 	reg_pmxevcntr_el0_write(val);
     87   1.1  jmcneill }
     88   1.1  jmcneill 
     89  1.18       ryo static inline uint64_t
     90  1.18       ryo armv8_pmu_get_pmevcntr(u_int counter)
     91   1.1  jmcneill {
     92  1.18       ryo 	reg_pmselr_el0_write(counter);
     93  1.18       ryo 	isb();
     94  1.18       ryo 	return reg_pmxevcntr_el0_read();
     95  1.18       ryo }
     96   1.1  jmcneill 
     97  1.20   msaitoh /* Read and write at once */
     98  1.18       ryo static inline uint64_t
     99  1.18       ryo armv8_pmu_getset_pmevcntr(u_int counter, uint64_t val)
    100  1.18       ryo {
    101  1.18       ryo 	uint64_t c;
    102   1.1  jmcneill 
    103  1.18       ryo 	reg_pmselr_el0_write(counter);
    104  1.18       ryo 	isb();
    105  1.18       ryo 	c = reg_pmxevcntr_el0_read();
    106  1.18       ryo 	reg_pmxevcntr_el0_write(val);
    107  1.18       ryo 	return c;
    108   1.1  jmcneill }
    109   1.1  jmcneill 
    110  1.18       ryo static uint32_t
    111  1.18       ryo armv8_pmu_ncounters(void)
    112   1.1  jmcneill {
    113  1.18       ryo 	return __SHIFTOUT(reg_pmcr_el0_read(), PMCR_N);
    114   1.1  jmcneill }
    115   1.1  jmcneill 
    116  1.18       ryo static u_int
    117  1.18       ryo armv8_pmu_counter_bitwidth(u_int counter)
    118   1.1  jmcneill {
    119  1.18       ryo 	return counter_bitwidth;
    120   1.1  jmcneill }
    121   1.1  jmcneill 
    122  1.18       ryo static uint64_t
    123  1.18       ryo armv8_pmu_counter_estimate_freq(u_int counter)
    124   1.1  jmcneill {
    125  1.18       ryo 	return curcpu()->ci_data.cpu_cc_freq;
    126   1.1  jmcneill }
    127   1.1  jmcneill 
    128   1.1  jmcneill static int
    129  1.18       ryo armv8_pmu_valid_event(u_int counter, const tprof_param_t *param)
    130   1.1  jmcneill {
    131   1.1  jmcneill 	if (!armv8_pmu_event_implemented(param->p_event)) {
    132   1.4  christos 		printf("%s: event %#" PRIx64 " not implemented on this CPU\n",
    133   1.1  jmcneill 		    __func__, param->p_event);
    134   1.1  jmcneill 		return EINVAL;
    135   1.1  jmcneill 	}
    136  1.18       ryo 	return 0;
    137  1.18       ryo }
    138  1.18       ryo 
    139  1.18       ryo static void
    140  1.18       ryo armv8_pmu_configure_event(u_int counter, const tprof_param_t *param)
    141  1.18       ryo {
    142  1.18       ryo 	/* Disable event counter */
    143  1.18       ryo 	reg_pmcntenclr_el0_write(__BIT(counter) & PMCNTEN_P);
    144  1.18       ryo 
    145  1.18       ryo 	/* Disable overflow interrupts */
    146  1.18       ryo 	reg_pmintenclr_el1_write(__BIT(counter) & PMINTEN_P);
    147   1.1  jmcneill 
    148  1.18       ryo 	/* Configure event counter */
    149  1.18       ryo 	uint64_t pmevtyper = __SHIFTIN(param->p_event, PMEVTYPER_EVTCOUNT);
    150  1.18       ryo 	if (!ISSET(param->p_flags, TPROF_PARAM_USER))
    151  1.18       ryo 		pmevtyper |= PMEVTYPER_U;
    152  1.18       ryo 	if (!ISSET(param->p_flags, TPROF_PARAM_KERN))
    153  1.18       ryo 		pmevtyper |= PMEVTYPER_P;
    154  1.18       ryo 	armv8_pmu_set_pmevtyper(counter, pmevtyper);
    155   1.1  jmcneill 
    156  1.18       ryo 	if (ISSET(param->p_flags, TPROF_PARAM_PROFILE) ||
    157  1.18       ryo 	    counter_bitwidth != 64) {
    158  1.18       ryo 		/* Enable overflow interrupts */
    159  1.18       ryo 		reg_pmintenset_el1_write(__BIT(counter) & PMINTEN_P);
    160  1.18       ryo 	}
    161  1.18       ryo 
    162  1.18       ryo 	/* Clear overflow flag */
    163  1.18       ryo 	reg_pmovsclr_el0_write(__BIT(counter) & PMOVS_P);
    164   1.1  jmcneill 
    165  1.20   msaitoh 	/* Reset the counter */
    166  1.18       ryo 	armv8_pmu_set_pmevcntr(counter, param->p_value);
    167   1.1  jmcneill }
    168   1.1  jmcneill 
    169   1.1  jmcneill static void
    170  1.18       ryo armv8_pmu_start(tprof_countermask_t runmask)
    171   1.1  jmcneill {
    172  1.18       ryo 	/* Enable event counters */
    173  1.18       ryo 	reg_pmcntenset_el0_write(runmask & PMCNTEN_P);
    174   1.1  jmcneill 
    175  1.18       ryo 	/*
    176  1.18       ryo 	 * PMCR.E is shared with PMCCNTR_EL0 and event counters.
    177  1.18       ryo 	 * It is set here in case PMCCNTR_EL0 is not used in the system.
    178  1.18       ryo 	 */
    179  1.18       ryo 	reg_pmcr_el0_write(reg_pmcr_el0_read() | PMCR_E);
    180  1.18       ryo }
    181  1.18       ryo 
    182  1.18       ryo static void
    183  1.18       ryo armv8_pmu_stop(tprof_countermask_t stopmask)
    184  1.18       ryo {
    185  1.18       ryo 	/* Disable event counter */
    186  1.18       ryo 	reg_pmcntenclr_el0_write(stopmask & PMCNTEN_P);
    187   1.1  jmcneill }
    188   1.1  jmcneill 
    189  1.18       ryo /* XXX: argument of armv8_pmu_intr() */
    190  1.18       ryo extern struct tprof_backend *tprof_backend;
    191  1.18       ryo static void *pmu_intr_arg;
    192   1.1  jmcneill 
    193   1.1  jmcneill int
    194   1.1  jmcneill armv8_pmu_intr(void *priv)
    195   1.1  jmcneill {
    196   1.1  jmcneill 	const struct trapframe * const tf = priv;
    197  1.18       ryo 	tprof_backend_softc_t *sc = pmu_intr_arg;
    198   1.1  jmcneill 	tprof_frame_info_t tfi;
    199  1.18       ryo 	int bit;
    200  1.19       ryo 	const uint32_t pmovs = reg_pmovsset_el0_read();
    201   1.1  jmcneill 
    202  1.18       ryo 	uint64_t *counters_offset =
    203  1.18       ryo 	    percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu());
    204  1.18       ryo 	uint32_t mask = pmovs;
    205  1.18       ryo 	while ((bit = ffs(mask)) != 0) {
    206  1.18       ryo 		bit--;
    207  1.18       ryo 		CLR(mask, __BIT(bit));
    208  1.18       ryo 
    209  1.18       ryo 		if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) {
    210  1.20   msaitoh 			/* Account for the counter, and reset */
    211  1.18       ryo 			uint64_t ctr = armv8_pmu_getset_pmevcntr(bit,
    212  1.18       ryo 			    sc->sc_count[bit].ctr_counter_reset_val);
    213  1.18       ryo 			counters_offset[bit] +=
    214  1.18       ryo 			    sc->sc_count[bit].ctr_counter_val + ctr;
    215  1.18       ryo 
    216  1.20   msaitoh 			/* Record a sample */
    217  1.18       ryo 			tfi.tfi_pc = tf->tf_pc;
    218  1.18       ryo 			tfi.tfi_counter = bit;
    219  1.18       ryo 			tfi.tfi_inkernel =
    220  1.18       ryo 			    tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS &&
    221  1.18       ryo 			    tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS;
    222  1.18       ryo 			tprof_sample(NULL, &tfi);
    223  1.19       ryo 		} else if (ISSET(sc->sc_ctr_ovf_mask, __BIT(bit))) {
    224  1.20   msaitoh 			/* Counter has overflowed */
    225  1.18       ryo 			counters_offset[bit] += __BIT(32);
    226  1.18       ryo 		}
    227   1.1  jmcneill 	}
    228   1.1  jmcneill 	reg_pmovsclr_el0_write(pmovs);
    229   1.1  jmcneill 
    230   1.1  jmcneill 	return 1;
    231   1.1  jmcneill }
    232   1.1  jmcneill 
    233  1.18       ryo static uint32_t
    234  1.18       ryo armv8_pmu_ident(void)
    235  1.18       ryo {
    236  1.18       ryo 	return TPROF_IDENT_ARMV8_GENERIC;
    237  1.18       ryo }
    238  1.18       ryo 
    239  1.18       ryo static const tprof_backend_ops_t tprof_armv8_pmu_ops = {
    240  1.18       ryo 	.tbo_ident = armv8_pmu_ident,
    241  1.18       ryo 	.tbo_ncounters = armv8_pmu_ncounters,
    242  1.18       ryo 	.tbo_counter_bitwidth = armv8_pmu_counter_bitwidth,
    243  1.18       ryo 	.tbo_counter_read = armv8_pmu_get_pmevcntr,
    244  1.18       ryo 	.tbo_counter_estimate_freq = armv8_pmu_counter_estimate_freq,
    245  1.18       ryo 	.tbo_valid_event = armv8_pmu_valid_event,
    246  1.18       ryo 	.tbo_configure_event = armv8_pmu_configure_event,
    247  1.18       ryo 	.tbo_start = armv8_pmu_start,
    248  1.18       ryo 	.tbo_stop = armv8_pmu_stop,
    249  1.18       ryo 	.tbo_establish = NULL,
    250  1.18       ryo 	.tbo_disestablish = NULL,
    251  1.18       ryo };
    252  1.18       ryo 
    253   1.7  jmcneill static void
    254   1.7  jmcneill armv8_pmu_init_cpu(void *arg1, void *arg2)
    255   1.8     skrll {
    256   1.3  jmcneill 	/* Disable EL0 access to performance monitors */
    257   1.2  jmcneill 	reg_pmuserenr_el0_write(0);
    258   1.2  jmcneill 
    259   1.2  jmcneill 	/* Disable interrupts */
    260  1.17       ryo 	reg_pmintenclr_el1_write(PMINTEN_P);
    261   1.2  jmcneill 
    262   1.5  jmcneill 	/* Disable event counters */
    263   1.5  jmcneill 	reg_pmcntenclr_el0_write(PMCNTEN_P);
    264   1.7  jmcneill }
    265   1.7  jmcneill 
    266  1.14  jmcneill bool
    267  1.14  jmcneill armv8_pmu_detect(void)
    268  1.14  jmcneill {
    269  1.14  jmcneill 	const uint64_t dfr0 = reg_id_aa64dfr0_el1_read();
    270  1.14  jmcneill 	const u_int pmuver = __SHIFTOUT(dfr0, ID_AA64DFR0_EL1_PMUVER);
    271  1.14  jmcneill 
    272  1.14  jmcneill 	return pmuver != ID_AA64DFR0_EL1_PMUVER_NONE &&
    273  1.14  jmcneill 	       pmuver != ID_AA64DFR0_EL1_PMUVER_IMPL;
    274  1.14  jmcneill }
    275  1.14  jmcneill 
    276   1.7  jmcneill int
    277   1.7  jmcneill armv8_pmu_init(void)
    278   1.7  jmcneill {
    279  1.18       ryo 	int error, ncounters;
    280  1.18       ryo 
    281  1.16       ryo 	KASSERT(armv8_pmu_detect());
    282  1.14  jmcneill 
    283  1.18       ryo 	ncounters = armv8_pmu_ncounters();
    284  1.18       ryo 	if (ncounters == 0)
    285  1.18       ryo 		return ENOTSUP;
    286  1.18       ryo 
    287  1.18       ryo 	/* Is 64bit event counter available? */
    288  1.18       ryo 	const uint64_t dfr0 = reg_id_aa64dfr0_el1_read();
    289  1.18       ryo 	const u_int pmuver = __SHIFTOUT(dfr0, ID_AA64DFR0_EL1_PMUVER);
    290  1.18       ryo 	if (pmuver >= ID_AA64DFR0_EL1_PMUVER_V3P5 &&
    291  1.18       ryo 	    ISSET(reg_pmcr_el0_read(), PMCR_LP))
    292  1.18       ryo 		counter_bitwidth = 64;
    293  1.18       ryo 	else
    294  1.18       ryo 		counter_bitwidth = 32;
    295  1.18       ryo 
    296  1.12     skrll 	uint64_t xc = xc_broadcast(0, armv8_pmu_init_cpu, NULL, NULL);
    297   1.7  jmcneill 	xc_wait(xc);
    298   1.2  jmcneill 
    299  1.18       ryo 	error = tprof_backend_register("tprof_armv8", &tprof_armv8_pmu_ops,
    300   1.1  jmcneill 	    TPROF_BACKEND_VERSION);
    301  1.18       ryo 	if (error == 0) {
    302  1.18       ryo 		/* XXX: for argument of armv8_pmu_intr() */
    303  1.18       ryo 		pmu_intr_arg = tprof_backend;
    304  1.18       ryo 	}
    305  1.18       ryo 
    306  1.18       ryo 	return error;
    307   1.1  jmcneill }
    308