1 1.20 msaitoh /* $NetBSD: tprof_armv8.c,v 1.20 2023/04/11 10:07:12 msaitoh Exp $ */ 2 1.1 jmcneill 3 1.1 jmcneill /*- 4 1.1 jmcneill * Copyright (c) 2018 Jared McNeill <jmcneill (at) invisible.ca> 5 1.1 jmcneill * All rights reserved. 6 1.1 jmcneill * 7 1.1 jmcneill * Redistribution and use in source and binary forms, with or without 8 1.1 jmcneill * modification, are permitted provided that the following conditions 9 1.1 jmcneill * are met: 10 1.1 jmcneill * 1. Redistributions of source code must retain the above copyright 11 1.1 jmcneill * notice, this list of conditions and the following disclaimer. 12 1.1 jmcneill * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 jmcneill * notice, this list of conditions and the following disclaimer in the 14 1.1 jmcneill * documentation and/or other materials provided with the distribution. 15 1.1 jmcneill * 16 1.1 jmcneill * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 1.1 jmcneill * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 1.1 jmcneill * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 1.1 jmcneill * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 1.1 jmcneill * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 1.1 jmcneill * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 1.1 jmcneill * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 1.1 jmcneill * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 1.1 jmcneill * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 1.1 jmcneill * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 1.1 jmcneill * SUCH DAMAGE. 27 1.1 jmcneill */ 28 1.1 jmcneill 29 1.1 jmcneill #include <sys/cdefs.h> 30 1.20 msaitoh __KERNEL_RCSID(0, "$NetBSD: tprof_armv8.c,v 1.20 2023/04/11 10:07:12 msaitoh Exp $"); 31 1.1 jmcneill 32 1.1 jmcneill #include <sys/param.h> 33 1.1 jmcneill #include <sys/bus.h> 34 1.1 jmcneill #include <sys/cpu.h> 35 1.18 ryo #include <sys/percpu.h> 36 1.1 jmcneill #include <sys/xcall.h> 37 1.1 jmcneill 38 1.1 jmcneill #include <dev/tprof/tprof.h> 39 1.1 jmcneill 40 1.1 jmcneill #include <arm/armreg.h> 41 1.6 skrll #include <arm/cpufunc.h> 42 1.1 jmcneill 43 1.1 jmcneill #include <dev/tprof/tprof_armv8.h> 44 1.1 jmcneill 45 1.18 ryo static u_int counter_bitwidth; 46 1.18 ryo 47 1.18 ryo /* 48 1.18 ryo * armv8 can handle up to 31 event counters, 49 1.18 ryo * PMCR_EL0.N counters are actually available. 50 1.18 ryo */ 51 1.1 jmcneill 52 1.1 jmcneill static bool 53 1.1 jmcneill armv8_pmu_event_implemented(uint16_t event) 54 1.1 jmcneill { 55 1.1 jmcneill uint64_t eid[2]; 56 1.1 jmcneill 57 1.1 jmcneill if (event >= 64) 58 1.1 jmcneill return false; 59 1.1 jmcneill 60 1.1 jmcneill eid[0] = reg_pmceid0_el0_read(); 61 1.1 jmcneill eid[1] = reg_pmceid1_el0_read(); 62 1.1 jmcneill 63 1.13 skrll /* The low 32bits of PMCEID[01]_EL0 contain the common events 0 to n */ 64 1.1 jmcneill const u_int idx = event / 32; 65 1.1 jmcneill const u_int bit = event % 32; 66 1.1 jmcneill 67 1.1 jmcneill if (eid[idx] & __BIT(bit)) 68 1.1 jmcneill return true; 69 1.1 jmcneill 70 1.1 jmcneill return false; 71 1.1 jmcneill } 72 1.1 jmcneill 73 1.1 jmcneill static void 74 1.1 jmcneill armv8_pmu_set_pmevtyper(u_int counter, uint64_t val) 75 1.1 jmcneill { 76 1.1 jmcneill reg_pmselr_el0_write(counter); 77 1.6 skrll isb(); 78 1.1 jmcneill reg_pmxevtyper_el0_write(val); 79 1.1 jmcneill } 80 1.1 jmcneill 81 1.18 ryo static inline void 82 1.18 ryo armv8_pmu_set_pmevcntr(u_int counter, uint64_t val) 83 1.1 jmcneill { 84 1.1 jmcneill reg_pmselr_el0_write(counter); 85 1.6 skrll isb(); 86 1.1 jmcneill reg_pmxevcntr_el0_write(val); 87 1.1 jmcneill } 88 1.1 jmcneill 89 1.18 ryo static inline uint64_t 90 1.18 ryo armv8_pmu_get_pmevcntr(u_int counter) 91 1.1 jmcneill { 92 1.18 ryo reg_pmselr_el0_write(counter); 93 1.18 ryo isb(); 94 1.18 ryo return reg_pmxevcntr_el0_read(); 95 1.18 ryo } 96 1.1 jmcneill 97 1.20 msaitoh /* Read and write at once */ 98 1.18 ryo static inline uint64_t 99 1.18 ryo armv8_pmu_getset_pmevcntr(u_int counter, uint64_t val) 100 1.18 ryo { 101 1.18 ryo uint64_t c; 102 1.1 jmcneill 103 1.18 ryo reg_pmselr_el0_write(counter); 104 1.18 ryo isb(); 105 1.18 ryo c = reg_pmxevcntr_el0_read(); 106 1.18 ryo reg_pmxevcntr_el0_write(val); 107 1.18 ryo return c; 108 1.1 jmcneill } 109 1.1 jmcneill 110 1.18 ryo static uint32_t 111 1.18 ryo armv8_pmu_ncounters(void) 112 1.1 jmcneill { 113 1.18 ryo return __SHIFTOUT(reg_pmcr_el0_read(), PMCR_N); 114 1.1 jmcneill } 115 1.1 jmcneill 116 1.18 ryo static u_int 117 1.18 ryo armv8_pmu_counter_bitwidth(u_int counter) 118 1.1 jmcneill { 119 1.18 ryo return counter_bitwidth; 120 1.1 jmcneill } 121 1.1 jmcneill 122 1.18 ryo static uint64_t 123 1.18 ryo armv8_pmu_counter_estimate_freq(u_int counter) 124 1.1 jmcneill { 125 1.18 ryo return curcpu()->ci_data.cpu_cc_freq; 126 1.1 jmcneill } 127 1.1 jmcneill 128 1.1 jmcneill static int 129 1.18 ryo armv8_pmu_valid_event(u_int counter, const tprof_param_t *param) 130 1.1 jmcneill { 131 1.1 jmcneill if (!armv8_pmu_event_implemented(param->p_event)) { 132 1.4 christos printf("%s: event %#" PRIx64 " not implemented on this CPU\n", 133 1.1 jmcneill __func__, param->p_event); 134 1.1 jmcneill return EINVAL; 135 1.1 jmcneill } 136 1.18 ryo return 0; 137 1.18 ryo } 138 1.18 ryo 139 1.18 ryo static void 140 1.18 ryo armv8_pmu_configure_event(u_int counter, const tprof_param_t *param) 141 1.18 ryo { 142 1.18 ryo /* Disable event counter */ 143 1.18 ryo reg_pmcntenclr_el0_write(__BIT(counter) & PMCNTEN_P); 144 1.18 ryo 145 1.18 ryo /* Disable overflow interrupts */ 146 1.18 ryo reg_pmintenclr_el1_write(__BIT(counter) & PMINTEN_P); 147 1.1 jmcneill 148 1.18 ryo /* Configure event counter */ 149 1.18 ryo uint64_t pmevtyper = __SHIFTIN(param->p_event, PMEVTYPER_EVTCOUNT); 150 1.18 ryo if (!ISSET(param->p_flags, TPROF_PARAM_USER)) 151 1.18 ryo pmevtyper |= PMEVTYPER_U; 152 1.18 ryo if (!ISSET(param->p_flags, TPROF_PARAM_KERN)) 153 1.18 ryo pmevtyper |= PMEVTYPER_P; 154 1.18 ryo armv8_pmu_set_pmevtyper(counter, pmevtyper); 155 1.1 jmcneill 156 1.18 ryo if (ISSET(param->p_flags, TPROF_PARAM_PROFILE) || 157 1.18 ryo counter_bitwidth != 64) { 158 1.18 ryo /* Enable overflow interrupts */ 159 1.18 ryo reg_pmintenset_el1_write(__BIT(counter) & PMINTEN_P); 160 1.18 ryo } 161 1.18 ryo 162 1.18 ryo /* Clear overflow flag */ 163 1.18 ryo reg_pmovsclr_el0_write(__BIT(counter) & PMOVS_P); 164 1.1 jmcneill 165 1.20 msaitoh /* Reset the counter */ 166 1.18 ryo armv8_pmu_set_pmevcntr(counter, param->p_value); 167 1.1 jmcneill } 168 1.1 jmcneill 169 1.1 jmcneill static void 170 1.18 ryo armv8_pmu_start(tprof_countermask_t runmask) 171 1.1 jmcneill { 172 1.18 ryo /* Enable event counters */ 173 1.18 ryo reg_pmcntenset_el0_write(runmask & PMCNTEN_P); 174 1.1 jmcneill 175 1.18 ryo /* 176 1.18 ryo * PMCR.E is shared with PMCCNTR_EL0 and event counters. 177 1.18 ryo * It is set here in case PMCCNTR_EL0 is not used in the system. 178 1.18 ryo */ 179 1.18 ryo reg_pmcr_el0_write(reg_pmcr_el0_read() | PMCR_E); 180 1.18 ryo } 181 1.18 ryo 182 1.18 ryo static void 183 1.18 ryo armv8_pmu_stop(tprof_countermask_t stopmask) 184 1.18 ryo { 185 1.18 ryo /* Disable event counter */ 186 1.18 ryo reg_pmcntenclr_el0_write(stopmask & PMCNTEN_P); 187 1.1 jmcneill } 188 1.1 jmcneill 189 1.18 ryo /* XXX: argument of armv8_pmu_intr() */ 190 1.18 ryo extern struct tprof_backend *tprof_backend; 191 1.18 ryo static void *pmu_intr_arg; 192 1.1 jmcneill 193 1.1 jmcneill int 194 1.1 jmcneill armv8_pmu_intr(void *priv) 195 1.1 jmcneill { 196 1.1 jmcneill const struct trapframe * const tf = priv; 197 1.18 ryo tprof_backend_softc_t *sc = pmu_intr_arg; 198 1.1 jmcneill tprof_frame_info_t tfi; 199 1.18 ryo int bit; 200 1.19 ryo const uint32_t pmovs = reg_pmovsset_el0_read(); 201 1.1 jmcneill 202 1.18 ryo uint64_t *counters_offset = 203 1.18 ryo percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu()); 204 1.18 ryo uint32_t mask = pmovs; 205 1.18 ryo while ((bit = ffs(mask)) != 0) { 206 1.18 ryo bit--; 207 1.18 ryo CLR(mask, __BIT(bit)); 208 1.18 ryo 209 1.18 ryo if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) { 210 1.20 msaitoh /* Account for the counter, and reset */ 211 1.18 ryo uint64_t ctr = armv8_pmu_getset_pmevcntr(bit, 212 1.18 ryo sc->sc_count[bit].ctr_counter_reset_val); 213 1.18 ryo counters_offset[bit] += 214 1.18 ryo sc->sc_count[bit].ctr_counter_val + ctr; 215 1.18 ryo 216 1.20 msaitoh /* Record a sample */ 217 1.18 ryo tfi.tfi_pc = tf->tf_pc; 218 1.18 ryo tfi.tfi_counter = bit; 219 1.18 ryo tfi.tfi_inkernel = 220 1.18 ryo tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS && 221 1.18 ryo tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS; 222 1.18 ryo tprof_sample(NULL, &tfi); 223 1.19 ryo } else if (ISSET(sc->sc_ctr_ovf_mask, __BIT(bit))) { 224 1.20 msaitoh /* Counter has overflowed */ 225 1.18 ryo counters_offset[bit] += __BIT(32); 226 1.18 ryo } 227 1.1 jmcneill } 228 1.1 jmcneill reg_pmovsclr_el0_write(pmovs); 229 1.1 jmcneill 230 1.1 jmcneill return 1; 231 1.1 jmcneill } 232 1.1 jmcneill 233 1.18 ryo static uint32_t 234 1.18 ryo armv8_pmu_ident(void) 235 1.18 ryo { 236 1.18 ryo return TPROF_IDENT_ARMV8_GENERIC; 237 1.18 ryo } 238 1.18 ryo 239 1.18 ryo static const tprof_backend_ops_t tprof_armv8_pmu_ops = { 240 1.18 ryo .tbo_ident = armv8_pmu_ident, 241 1.18 ryo .tbo_ncounters = armv8_pmu_ncounters, 242 1.18 ryo .tbo_counter_bitwidth = armv8_pmu_counter_bitwidth, 243 1.18 ryo .tbo_counter_read = armv8_pmu_get_pmevcntr, 244 1.18 ryo .tbo_counter_estimate_freq = armv8_pmu_counter_estimate_freq, 245 1.18 ryo .tbo_valid_event = armv8_pmu_valid_event, 246 1.18 ryo .tbo_configure_event = armv8_pmu_configure_event, 247 1.18 ryo .tbo_start = armv8_pmu_start, 248 1.18 ryo .tbo_stop = armv8_pmu_stop, 249 1.18 ryo .tbo_establish = NULL, 250 1.18 ryo .tbo_disestablish = NULL, 251 1.18 ryo }; 252 1.18 ryo 253 1.7 jmcneill static void 254 1.7 jmcneill armv8_pmu_init_cpu(void *arg1, void *arg2) 255 1.8 skrll { 256 1.3 jmcneill /* Disable EL0 access to performance monitors */ 257 1.2 jmcneill reg_pmuserenr_el0_write(0); 258 1.2 jmcneill 259 1.2 jmcneill /* Disable interrupts */ 260 1.17 ryo reg_pmintenclr_el1_write(PMINTEN_P); 261 1.2 jmcneill 262 1.5 jmcneill /* Disable event counters */ 263 1.5 jmcneill reg_pmcntenclr_el0_write(PMCNTEN_P); 264 1.7 jmcneill } 265 1.7 jmcneill 266 1.14 jmcneill bool 267 1.14 jmcneill armv8_pmu_detect(void) 268 1.14 jmcneill { 269 1.14 jmcneill const uint64_t dfr0 = reg_id_aa64dfr0_el1_read(); 270 1.14 jmcneill const u_int pmuver = __SHIFTOUT(dfr0, ID_AA64DFR0_EL1_PMUVER); 271 1.14 jmcneill 272 1.14 jmcneill return pmuver != ID_AA64DFR0_EL1_PMUVER_NONE && 273 1.14 jmcneill pmuver != ID_AA64DFR0_EL1_PMUVER_IMPL; 274 1.14 jmcneill } 275 1.14 jmcneill 276 1.7 jmcneill int 277 1.7 jmcneill armv8_pmu_init(void) 278 1.7 jmcneill { 279 1.18 ryo int error, ncounters; 280 1.18 ryo 281 1.16 ryo KASSERT(armv8_pmu_detect()); 282 1.14 jmcneill 283 1.18 ryo ncounters = armv8_pmu_ncounters(); 284 1.18 ryo if (ncounters == 0) 285 1.18 ryo return ENOTSUP; 286 1.18 ryo 287 1.18 ryo /* Is 64bit event counter available? */ 288 1.18 ryo const uint64_t dfr0 = reg_id_aa64dfr0_el1_read(); 289 1.18 ryo const u_int pmuver = __SHIFTOUT(dfr0, ID_AA64DFR0_EL1_PMUVER); 290 1.18 ryo if (pmuver >= ID_AA64DFR0_EL1_PMUVER_V3P5 && 291 1.18 ryo ISSET(reg_pmcr_el0_read(), PMCR_LP)) 292 1.18 ryo counter_bitwidth = 64; 293 1.18 ryo else 294 1.18 ryo counter_bitwidth = 32; 295 1.18 ryo 296 1.12 skrll uint64_t xc = xc_broadcast(0, armv8_pmu_init_cpu, NULL, NULL); 297 1.7 jmcneill xc_wait(xc); 298 1.2 jmcneill 299 1.18 ryo error = tprof_backend_register("tprof_armv8", &tprof_armv8_pmu_ops, 300 1.1 jmcneill TPROF_BACKEND_VERSION); 301 1.18 ryo if (error == 0) { 302 1.18 ryo /* XXX: for argument of armv8_pmu_intr() */ 303 1.18 ryo pmu_intr_arg = tprof_backend; 304 1.18 ryo } 305 1.18 ryo 306 1.18 ryo return error; 307 1.1 jmcneill } 308