tprof_armv8.c revision 1.20 1 1.20 msaitoh /* $NetBSD: tprof_armv8.c,v 1.20 2023/04/11 10:07:12 msaitoh Exp $ */
2 1.1 jmcneill
3 1.1 jmcneill /*-
4 1.1 jmcneill * Copyright (c) 2018 Jared McNeill <jmcneill (at) invisible.ca>
5 1.1 jmcneill * All rights reserved.
6 1.1 jmcneill *
7 1.1 jmcneill * Redistribution and use in source and binary forms, with or without
8 1.1 jmcneill * modification, are permitted provided that the following conditions
9 1.1 jmcneill * are met:
10 1.1 jmcneill * 1. Redistributions of source code must retain the above copyright
11 1.1 jmcneill * notice, this list of conditions and the following disclaimer.
12 1.1 jmcneill * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 jmcneill * notice, this list of conditions and the following disclaimer in the
14 1.1 jmcneill * documentation and/or other materials provided with the distribution.
15 1.1 jmcneill *
16 1.1 jmcneill * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 1.1 jmcneill * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 1.1 jmcneill * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 1.1 jmcneill * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 1.1 jmcneill * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 1.1 jmcneill * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 1.1 jmcneill * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 1.1 jmcneill * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 1.1 jmcneill * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 1.1 jmcneill * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 1.1 jmcneill * SUCH DAMAGE.
27 1.1 jmcneill */
28 1.1 jmcneill
29 1.1 jmcneill #include <sys/cdefs.h>
30 1.20 msaitoh __KERNEL_RCSID(0, "$NetBSD: tprof_armv8.c,v 1.20 2023/04/11 10:07:12 msaitoh Exp $");
31 1.1 jmcneill
32 1.1 jmcneill #include <sys/param.h>
33 1.1 jmcneill #include <sys/bus.h>
34 1.1 jmcneill #include <sys/cpu.h>
35 1.18 ryo #include <sys/percpu.h>
36 1.1 jmcneill #include <sys/xcall.h>
37 1.1 jmcneill
38 1.1 jmcneill #include <dev/tprof/tprof.h>
39 1.1 jmcneill
40 1.1 jmcneill #include <arm/armreg.h>
41 1.6 skrll #include <arm/cpufunc.h>
42 1.1 jmcneill
43 1.1 jmcneill #include <dev/tprof/tprof_armv8.h>
44 1.1 jmcneill
45 1.18 ryo static u_int counter_bitwidth;
46 1.18 ryo
47 1.18 ryo /*
48 1.18 ryo * armv8 can handle up to 31 event counters,
49 1.18 ryo * PMCR_EL0.N counters are actually available.
50 1.18 ryo */
51 1.1 jmcneill
52 1.1 jmcneill static bool
53 1.1 jmcneill armv8_pmu_event_implemented(uint16_t event)
54 1.1 jmcneill {
55 1.1 jmcneill uint64_t eid[2];
56 1.1 jmcneill
57 1.1 jmcneill if (event >= 64)
58 1.1 jmcneill return false;
59 1.1 jmcneill
60 1.1 jmcneill eid[0] = reg_pmceid0_el0_read();
61 1.1 jmcneill eid[1] = reg_pmceid1_el0_read();
62 1.1 jmcneill
63 1.13 skrll /* The low 32bits of PMCEID[01]_EL0 contain the common events 0 to n */
64 1.1 jmcneill const u_int idx = event / 32;
65 1.1 jmcneill const u_int bit = event % 32;
66 1.1 jmcneill
67 1.1 jmcneill if (eid[idx] & __BIT(bit))
68 1.1 jmcneill return true;
69 1.1 jmcneill
70 1.1 jmcneill return false;
71 1.1 jmcneill }
72 1.1 jmcneill
73 1.1 jmcneill static void
74 1.1 jmcneill armv8_pmu_set_pmevtyper(u_int counter, uint64_t val)
75 1.1 jmcneill {
76 1.1 jmcneill reg_pmselr_el0_write(counter);
77 1.6 skrll isb();
78 1.1 jmcneill reg_pmxevtyper_el0_write(val);
79 1.1 jmcneill }
80 1.1 jmcneill
81 1.18 ryo static inline void
82 1.18 ryo armv8_pmu_set_pmevcntr(u_int counter, uint64_t val)
83 1.1 jmcneill {
84 1.1 jmcneill reg_pmselr_el0_write(counter);
85 1.6 skrll isb();
86 1.1 jmcneill reg_pmxevcntr_el0_write(val);
87 1.1 jmcneill }
88 1.1 jmcneill
89 1.18 ryo static inline uint64_t
90 1.18 ryo armv8_pmu_get_pmevcntr(u_int counter)
91 1.1 jmcneill {
92 1.18 ryo reg_pmselr_el0_write(counter);
93 1.18 ryo isb();
94 1.18 ryo return reg_pmxevcntr_el0_read();
95 1.18 ryo }
96 1.1 jmcneill
97 1.20 msaitoh /* Read and write at once */
98 1.18 ryo static inline uint64_t
99 1.18 ryo armv8_pmu_getset_pmevcntr(u_int counter, uint64_t val)
100 1.18 ryo {
101 1.18 ryo uint64_t c;
102 1.1 jmcneill
103 1.18 ryo reg_pmselr_el0_write(counter);
104 1.18 ryo isb();
105 1.18 ryo c = reg_pmxevcntr_el0_read();
106 1.18 ryo reg_pmxevcntr_el0_write(val);
107 1.18 ryo return c;
108 1.1 jmcneill }
109 1.1 jmcneill
110 1.18 ryo static uint32_t
111 1.18 ryo armv8_pmu_ncounters(void)
112 1.1 jmcneill {
113 1.18 ryo return __SHIFTOUT(reg_pmcr_el0_read(), PMCR_N);
114 1.1 jmcneill }
115 1.1 jmcneill
116 1.18 ryo static u_int
117 1.18 ryo armv8_pmu_counter_bitwidth(u_int counter)
118 1.1 jmcneill {
119 1.18 ryo return counter_bitwidth;
120 1.1 jmcneill }
121 1.1 jmcneill
122 1.18 ryo static uint64_t
123 1.18 ryo armv8_pmu_counter_estimate_freq(u_int counter)
124 1.1 jmcneill {
125 1.18 ryo return curcpu()->ci_data.cpu_cc_freq;
126 1.1 jmcneill }
127 1.1 jmcneill
128 1.1 jmcneill static int
129 1.18 ryo armv8_pmu_valid_event(u_int counter, const tprof_param_t *param)
130 1.1 jmcneill {
131 1.1 jmcneill if (!armv8_pmu_event_implemented(param->p_event)) {
132 1.4 christos printf("%s: event %#" PRIx64 " not implemented on this CPU\n",
133 1.1 jmcneill __func__, param->p_event);
134 1.1 jmcneill return EINVAL;
135 1.1 jmcneill }
136 1.18 ryo return 0;
137 1.18 ryo }
138 1.18 ryo
139 1.18 ryo static void
140 1.18 ryo armv8_pmu_configure_event(u_int counter, const tprof_param_t *param)
141 1.18 ryo {
142 1.18 ryo /* Disable event counter */
143 1.18 ryo reg_pmcntenclr_el0_write(__BIT(counter) & PMCNTEN_P);
144 1.18 ryo
145 1.18 ryo /* Disable overflow interrupts */
146 1.18 ryo reg_pmintenclr_el1_write(__BIT(counter) & PMINTEN_P);
147 1.1 jmcneill
148 1.18 ryo /* Configure event counter */
149 1.18 ryo uint64_t pmevtyper = __SHIFTIN(param->p_event, PMEVTYPER_EVTCOUNT);
150 1.18 ryo if (!ISSET(param->p_flags, TPROF_PARAM_USER))
151 1.18 ryo pmevtyper |= PMEVTYPER_U;
152 1.18 ryo if (!ISSET(param->p_flags, TPROF_PARAM_KERN))
153 1.18 ryo pmevtyper |= PMEVTYPER_P;
154 1.18 ryo armv8_pmu_set_pmevtyper(counter, pmevtyper);
155 1.1 jmcneill
156 1.18 ryo if (ISSET(param->p_flags, TPROF_PARAM_PROFILE) ||
157 1.18 ryo counter_bitwidth != 64) {
158 1.18 ryo /* Enable overflow interrupts */
159 1.18 ryo reg_pmintenset_el1_write(__BIT(counter) & PMINTEN_P);
160 1.18 ryo }
161 1.18 ryo
162 1.18 ryo /* Clear overflow flag */
163 1.18 ryo reg_pmovsclr_el0_write(__BIT(counter) & PMOVS_P);
164 1.1 jmcneill
165 1.20 msaitoh /* Reset the counter */
166 1.18 ryo armv8_pmu_set_pmevcntr(counter, param->p_value);
167 1.1 jmcneill }
168 1.1 jmcneill
169 1.1 jmcneill static void
170 1.18 ryo armv8_pmu_start(tprof_countermask_t runmask)
171 1.1 jmcneill {
172 1.18 ryo /* Enable event counters */
173 1.18 ryo reg_pmcntenset_el0_write(runmask & PMCNTEN_P);
174 1.1 jmcneill
175 1.18 ryo /*
176 1.18 ryo * PMCR.E is shared with PMCCNTR_EL0 and event counters.
177 1.18 ryo * It is set here in case PMCCNTR_EL0 is not used in the system.
178 1.18 ryo */
179 1.18 ryo reg_pmcr_el0_write(reg_pmcr_el0_read() | PMCR_E);
180 1.18 ryo }
181 1.18 ryo
182 1.18 ryo static void
183 1.18 ryo armv8_pmu_stop(tprof_countermask_t stopmask)
184 1.18 ryo {
185 1.18 ryo /* Disable event counter */
186 1.18 ryo reg_pmcntenclr_el0_write(stopmask & PMCNTEN_P);
187 1.1 jmcneill }
188 1.1 jmcneill
189 1.18 ryo /* XXX: argument of armv8_pmu_intr() */
190 1.18 ryo extern struct tprof_backend *tprof_backend;
191 1.18 ryo static void *pmu_intr_arg;
192 1.1 jmcneill
193 1.1 jmcneill int
194 1.1 jmcneill armv8_pmu_intr(void *priv)
195 1.1 jmcneill {
196 1.1 jmcneill const struct trapframe * const tf = priv;
197 1.18 ryo tprof_backend_softc_t *sc = pmu_intr_arg;
198 1.1 jmcneill tprof_frame_info_t tfi;
199 1.18 ryo int bit;
200 1.19 ryo const uint32_t pmovs = reg_pmovsset_el0_read();
201 1.1 jmcneill
202 1.18 ryo uint64_t *counters_offset =
203 1.18 ryo percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu());
204 1.18 ryo uint32_t mask = pmovs;
205 1.18 ryo while ((bit = ffs(mask)) != 0) {
206 1.18 ryo bit--;
207 1.18 ryo CLR(mask, __BIT(bit));
208 1.18 ryo
209 1.18 ryo if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) {
210 1.20 msaitoh /* Account for the counter, and reset */
211 1.18 ryo uint64_t ctr = armv8_pmu_getset_pmevcntr(bit,
212 1.18 ryo sc->sc_count[bit].ctr_counter_reset_val);
213 1.18 ryo counters_offset[bit] +=
214 1.18 ryo sc->sc_count[bit].ctr_counter_val + ctr;
215 1.18 ryo
216 1.20 msaitoh /* Record a sample */
217 1.18 ryo tfi.tfi_pc = tf->tf_pc;
218 1.18 ryo tfi.tfi_counter = bit;
219 1.18 ryo tfi.tfi_inkernel =
220 1.18 ryo tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS &&
221 1.18 ryo tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS;
222 1.18 ryo tprof_sample(NULL, &tfi);
223 1.19 ryo } else if (ISSET(sc->sc_ctr_ovf_mask, __BIT(bit))) {
224 1.20 msaitoh /* Counter has overflowed */
225 1.18 ryo counters_offset[bit] += __BIT(32);
226 1.18 ryo }
227 1.1 jmcneill }
228 1.1 jmcneill reg_pmovsclr_el0_write(pmovs);
229 1.1 jmcneill
230 1.1 jmcneill return 1;
231 1.1 jmcneill }
232 1.1 jmcneill
233 1.18 ryo static uint32_t
234 1.18 ryo armv8_pmu_ident(void)
235 1.18 ryo {
236 1.18 ryo return TPROF_IDENT_ARMV8_GENERIC;
237 1.18 ryo }
238 1.18 ryo
239 1.18 ryo static const tprof_backend_ops_t tprof_armv8_pmu_ops = {
240 1.18 ryo .tbo_ident = armv8_pmu_ident,
241 1.18 ryo .tbo_ncounters = armv8_pmu_ncounters,
242 1.18 ryo .tbo_counter_bitwidth = armv8_pmu_counter_bitwidth,
243 1.18 ryo .tbo_counter_read = armv8_pmu_get_pmevcntr,
244 1.18 ryo .tbo_counter_estimate_freq = armv8_pmu_counter_estimate_freq,
245 1.18 ryo .tbo_valid_event = armv8_pmu_valid_event,
246 1.18 ryo .tbo_configure_event = armv8_pmu_configure_event,
247 1.18 ryo .tbo_start = armv8_pmu_start,
248 1.18 ryo .tbo_stop = armv8_pmu_stop,
249 1.18 ryo .tbo_establish = NULL,
250 1.18 ryo .tbo_disestablish = NULL,
251 1.18 ryo };
252 1.18 ryo
253 1.7 jmcneill static void
254 1.7 jmcneill armv8_pmu_init_cpu(void *arg1, void *arg2)
255 1.8 skrll {
256 1.3 jmcneill /* Disable EL0 access to performance monitors */
257 1.2 jmcneill reg_pmuserenr_el0_write(0);
258 1.2 jmcneill
259 1.2 jmcneill /* Disable interrupts */
260 1.17 ryo reg_pmintenclr_el1_write(PMINTEN_P);
261 1.2 jmcneill
262 1.5 jmcneill /* Disable event counters */
263 1.5 jmcneill reg_pmcntenclr_el0_write(PMCNTEN_P);
264 1.7 jmcneill }
265 1.7 jmcneill
266 1.14 jmcneill bool
267 1.14 jmcneill armv8_pmu_detect(void)
268 1.14 jmcneill {
269 1.14 jmcneill const uint64_t dfr0 = reg_id_aa64dfr0_el1_read();
270 1.14 jmcneill const u_int pmuver = __SHIFTOUT(dfr0, ID_AA64DFR0_EL1_PMUVER);
271 1.14 jmcneill
272 1.14 jmcneill return pmuver != ID_AA64DFR0_EL1_PMUVER_NONE &&
273 1.14 jmcneill pmuver != ID_AA64DFR0_EL1_PMUVER_IMPL;
274 1.14 jmcneill }
275 1.14 jmcneill
276 1.7 jmcneill int
277 1.7 jmcneill armv8_pmu_init(void)
278 1.7 jmcneill {
279 1.18 ryo int error, ncounters;
280 1.18 ryo
281 1.16 ryo KASSERT(armv8_pmu_detect());
282 1.14 jmcneill
283 1.18 ryo ncounters = armv8_pmu_ncounters();
284 1.18 ryo if (ncounters == 0)
285 1.18 ryo return ENOTSUP;
286 1.18 ryo
287 1.18 ryo /* Is 64bit event counter available? */
288 1.18 ryo const uint64_t dfr0 = reg_id_aa64dfr0_el1_read();
289 1.18 ryo const u_int pmuver = __SHIFTOUT(dfr0, ID_AA64DFR0_EL1_PMUVER);
290 1.18 ryo if (pmuver >= ID_AA64DFR0_EL1_PMUVER_V3P5 &&
291 1.18 ryo ISSET(reg_pmcr_el0_read(), PMCR_LP))
292 1.18 ryo counter_bitwidth = 64;
293 1.18 ryo else
294 1.18 ryo counter_bitwidth = 32;
295 1.18 ryo
296 1.12 skrll uint64_t xc = xc_broadcast(0, armv8_pmu_init_cpu, NULL, NULL);
297 1.7 jmcneill xc_wait(xc);
298 1.2 jmcneill
299 1.18 ryo error = tprof_backend_register("tprof_armv8", &tprof_armv8_pmu_ops,
300 1.1 jmcneill TPROF_BACKEND_VERSION);
301 1.18 ryo if (error == 0) {
302 1.18 ryo /* XXX: for argument of armv8_pmu_intr() */
303 1.18 ryo pmu_intr_arg = tprof_backend;
304 1.18 ryo }
305 1.18 ryo
306 1.18 ryo return error;
307 1.1 jmcneill }
308