tprof_armv7.c revision 1.12 1 1.12 ryo /* $NetBSD: tprof_armv7.c,v 1.12 2022/12/22 06:59:32 ryo Exp $ */
2 1.1 jmcneill
3 1.1 jmcneill /*-
4 1.1 jmcneill * Copyright (c) 2018 Jared McNeill <jmcneill (at) invisible.ca>
5 1.1 jmcneill * All rights reserved.
6 1.1 jmcneill *
7 1.1 jmcneill * Redistribution and use in source and binary forms, with or without
8 1.1 jmcneill * modification, are permitted provided that the following conditions
9 1.1 jmcneill * are met:
10 1.1 jmcneill * 1. Redistributions of source code must retain the above copyright
11 1.1 jmcneill * notice, this list of conditions and the following disclaimer.
12 1.1 jmcneill * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 jmcneill * notice, this list of conditions and the following disclaimer in the
14 1.1 jmcneill * documentation and/or other materials provided with the distribution.
15 1.1 jmcneill *
16 1.1 jmcneill * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 1.1 jmcneill * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 1.1 jmcneill * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 1.1 jmcneill * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 1.1 jmcneill * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 1.1 jmcneill * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 1.1 jmcneill * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 1.1 jmcneill * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 1.1 jmcneill * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 1.1 jmcneill * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 1.1 jmcneill * SUCH DAMAGE.
27 1.1 jmcneill */
28 1.1 jmcneill
29 1.1 jmcneill #include <sys/cdefs.h>
30 1.12 ryo __KERNEL_RCSID(0, "$NetBSD: tprof_armv7.c,v 1.12 2022/12/22 06:59:32 ryo Exp $");
31 1.1 jmcneill
32 1.1 jmcneill #include <sys/param.h>
33 1.1 jmcneill #include <sys/bus.h>
34 1.1 jmcneill #include <sys/cpu.h>
35 1.10 ryo #include <sys/percpu.h>
36 1.1 jmcneill #include <sys/xcall.h>
37 1.1 jmcneill
38 1.1 jmcneill #include <dev/tprof/tprof.h>
39 1.1 jmcneill
40 1.1 jmcneill #include <arm/armreg.h>
41 1.1 jmcneill #include <arm/locore.h>
42 1.1 jmcneill
43 1.1 jmcneill #include <dev/tprof/tprof_armv7.h>
44 1.1 jmcneill
45 1.7 jmcneill static uint16_t cortexa9_events[] = {
46 1.7 jmcneill 0x40, 0x41, 0x42,
47 1.7 jmcneill 0x50, 0x51,
48 1.7 jmcneill 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
49 1.7 jmcneill 0x6e,
50 1.7 jmcneill 0x70, 0x71, 0x72, 0x73, 0x74,
51 1.7 jmcneill 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86,
52 1.7 jmcneill 0x8a, 0x8b,
53 1.7 jmcneill 0x90, 0x91, 0x92, 0x93,
54 1.7 jmcneill 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5
55 1.7 jmcneill };
56 1.7 jmcneill
57 1.1 jmcneill static bool
58 1.1 jmcneill armv7_pmu_event_implemented(uint16_t event)
59 1.1 jmcneill {
60 1.7 jmcneill if (CPU_ID_CORTEX_A9_P(curcpu()->ci_midr)) {
61 1.7 jmcneill /* Cortex-A9 with PMUv1 lacks PMCEID0/1 */
62 1.7 jmcneill u_int n;
63 1.7 jmcneill
64 1.7 jmcneill /* Events specific to the Cortex-A9 */
65 1.7 jmcneill for (n = 0; n < __arraycount(cortexa9_events); n++) {
66 1.7 jmcneill if (cortexa9_events[n] == event) {
67 1.7 jmcneill return true;
68 1.7 jmcneill }
69 1.7 jmcneill }
70 1.7 jmcneill /* Supported architectural events */
71 1.7 jmcneill if (event != 0x08 && event != 0x0e && event < 0x1e) {
72 1.7 jmcneill return true;
73 1.7 jmcneill }
74 1.7 jmcneill } else {
75 1.7 jmcneill /* PMUv2 */
76 1.7 jmcneill uint32_t eid[2];
77 1.7 jmcneill
78 1.7 jmcneill if (event >= 64) {
79 1.7 jmcneill return false;
80 1.7 jmcneill }
81 1.7 jmcneill
82 1.7 jmcneill eid[0] = armreg_pmceid0_read();
83 1.7 jmcneill eid[1] = armreg_pmceid1_read();
84 1.7 jmcneill
85 1.7 jmcneill const u_int idx = event / 32;
86 1.7 jmcneill const u_int bit = event % 32;
87 1.7 jmcneill
88 1.7 jmcneill if (eid[idx] & __BIT(bit)) {
89 1.7 jmcneill return true;
90 1.7 jmcneill }
91 1.7 jmcneill }
92 1.1 jmcneill
93 1.1 jmcneill return false;
94 1.1 jmcneill }
95 1.1 jmcneill
96 1.1 jmcneill static void
97 1.1 jmcneill armv7_pmu_set_pmevtyper(u_int counter, uint64_t val)
98 1.1 jmcneill {
99 1.1 jmcneill armreg_pmselr_write(counter);
100 1.4 skrll isb();
101 1.1 jmcneill armreg_pmxevtyper_write(val);
102 1.1 jmcneill }
103 1.1 jmcneill
104 1.10 ryo static inline void
105 1.1 jmcneill armv7_pmu_set_pmevcntr(u_int counter, uint32_t val)
106 1.1 jmcneill {
107 1.1 jmcneill armreg_pmselr_write(counter);
108 1.4 skrll isb();
109 1.1 jmcneill armreg_pmxevcntr_write(val);
110 1.1 jmcneill }
111 1.1 jmcneill
112 1.10 ryo static inline uint64_t
113 1.10 ryo armv7_pmu_get_pmevcntr(u_int counter)
114 1.1 jmcneill {
115 1.10 ryo armreg_pmselr_write(counter);
116 1.10 ryo isb();
117 1.10 ryo return armreg_pmxevcntr_read();
118 1.10 ryo }
119 1.1 jmcneill
120 1.10 ryo /* read and write at once */
121 1.10 ryo static inline uint64_t
122 1.10 ryo armv7_pmu_getset_pmevcntr(u_int counter, uint64_t val)
123 1.10 ryo {
124 1.10 ryo uint64_t c;
125 1.1 jmcneill
126 1.10 ryo armreg_pmselr_write(counter);
127 1.10 ryo isb();
128 1.10 ryo c = armreg_pmxevcntr_read();
129 1.10 ryo armreg_pmxevcntr_write(val);
130 1.10 ryo return c;
131 1.10 ryo }
132 1.1 jmcneill
133 1.10 ryo static uint32_t
134 1.10 ryo armv7_pmu_ncounters(void)
135 1.10 ryo {
136 1.10 ryo return __SHIFTOUT(armreg_pmcr_read(), PMCR_N);
137 1.1 jmcneill }
138 1.1 jmcneill
139 1.10 ryo static u_int
140 1.10 ryo armv7_pmu_counter_bitwidth(u_int counter)
141 1.1 jmcneill {
142 1.10 ryo return 32;
143 1.1 jmcneill }
144 1.1 jmcneill
145 1.1 jmcneill static uint64_t
146 1.10 ryo armv7_pmu_counter_estimate_freq(u_int counter)
147 1.1 jmcneill {
148 1.1 jmcneill uint64_t cpufreq = curcpu()->ci_data.cpu_cc_freq;
149 1.1 jmcneill
150 1.10 ryo if (ISSET(armreg_pmcr_read(), PMCR_D))
151 1.10 ryo cpufreq /= 64;
152 1.10 ryo return cpufreq;
153 1.1 jmcneill }
154 1.1 jmcneill
155 1.1 jmcneill static int
156 1.10 ryo armv7_pmu_valid_event(u_int counter, const tprof_param_t *param)
157 1.1 jmcneill {
158 1.1 jmcneill if (!armv7_pmu_event_implemented(param->p_event)) {
159 1.10 ryo printf("%s: event %#" PRIx64 " not implemented on this CPU\n",
160 1.1 jmcneill __func__, param->p_event);
161 1.1 jmcneill return EINVAL;
162 1.1 jmcneill }
163 1.10 ryo return 0;
164 1.10 ryo }
165 1.1 jmcneill
166 1.10 ryo static void
167 1.10 ryo armv7_pmu_configure_event(u_int counter, const tprof_param_t *param)
168 1.10 ryo {
169 1.10 ryo /* Disable event counter */
170 1.10 ryo armreg_pmcntenclr_write(__BIT(counter) & PMCNTEN_P);
171 1.10 ryo
172 1.10 ryo /* Disable overflow interrupts */
173 1.10 ryo armreg_pmintenclr_write(__BIT(counter) & PMINTEN_P);
174 1.1 jmcneill
175 1.10 ryo /* Configure event counter */
176 1.10 ryo uint32_t pmevtyper = __SHIFTIN(param->p_event, PMEVTYPER_EVTCOUNT);
177 1.10 ryo if (!ISSET(param->p_flags, TPROF_PARAM_USER))
178 1.10 ryo pmevtyper |= PMEVTYPER_U;
179 1.10 ryo if (!ISSET(param->p_flags, TPROF_PARAM_KERN))
180 1.10 ryo pmevtyper |= PMEVTYPER_P;
181 1.10 ryo armv7_pmu_set_pmevtyper(counter, pmevtyper);
182 1.10 ryo
183 1.10 ryo /*
184 1.10 ryo * Enable overflow interrupts.
185 1.10 ryo * Whether profiled or not, the counter width of armv7 is 32 bits,
186 1.10 ryo * so overflow handling is required anyway.
187 1.10 ryo */
188 1.10 ryo armreg_pmintenset_write(__BIT(counter) & PMINTEN_P);
189 1.10 ryo
190 1.10 ryo /* Clear overflow flag */
191 1.10 ryo armreg_pmovsr_write(__BIT(counter) & PMOVS_P);
192 1.1 jmcneill
193 1.10 ryo /* reset the counter */
194 1.10 ryo armv7_pmu_set_pmevcntr(counter, param->p_value);
195 1.1 jmcneill }
196 1.1 jmcneill
197 1.1 jmcneill static void
198 1.10 ryo armv7_pmu_start(tprof_countermask_t runmask)
199 1.1 jmcneill {
200 1.10 ryo /* Enable event counters */
201 1.10 ryo armreg_pmcntenset_write(runmask & PMCNTEN_P);
202 1.10 ryo
203 1.10 ryo /*
204 1.10 ryo * PMCR.E is shared with PMCCNTR and event counters.
205 1.10 ryo * It is set here in case PMCCNTR is not used in the system.
206 1.10 ryo */
207 1.10 ryo armreg_pmcr_write(armreg_pmcr_read() | PMCR_E);
208 1.10 ryo }
209 1.1 jmcneill
210 1.10 ryo static void
211 1.10 ryo armv7_pmu_stop(tprof_countermask_t stopmask)
212 1.10 ryo {
213 1.10 ryo /* Disable event counter */
214 1.10 ryo armreg_pmcntenclr_write(stopmask & PMCNTEN_P);
215 1.1 jmcneill }
216 1.1 jmcneill
217 1.10 ryo /* XXX: argument of armv8_pmu_intr() */
218 1.10 ryo extern struct tprof_backend *tprof_backend;
219 1.10 ryo static void *pmu_intr_arg;
220 1.1 jmcneill
221 1.1 jmcneill int
222 1.1 jmcneill armv7_pmu_intr(void *priv)
223 1.1 jmcneill {
224 1.1 jmcneill const struct trapframe * const tf = priv;
225 1.10 ryo tprof_backend_softc_t *sc = pmu_intr_arg;
226 1.1 jmcneill tprof_frame_info_t tfi;
227 1.10 ryo int bit;
228 1.12 ryo const uint32_t pmovs = armreg_pmovsr_read();
229 1.1 jmcneill
230 1.10 ryo uint64_t *counters_offset =
231 1.10 ryo percpu_getptr_remote(sc->sc_ctr_offset_percpu, curcpu());
232 1.10 ryo uint32_t mask = pmovs;
233 1.10 ryo while ((bit = ffs(mask)) != 0) {
234 1.10 ryo bit--;
235 1.10 ryo CLR(mask, __BIT(bit));
236 1.10 ryo
237 1.10 ryo if (ISSET(sc->sc_ctr_prof_mask, __BIT(bit))) {
238 1.10 ryo /* account for the counter, and reset */
239 1.10 ryo uint64_t ctr = armv7_pmu_getset_pmevcntr(bit,
240 1.10 ryo sc->sc_count[bit].ctr_counter_reset_val);
241 1.10 ryo counters_offset[bit] +=
242 1.10 ryo sc->sc_count[bit].ctr_counter_val + ctr;
243 1.10 ryo
244 1.10 ryo /* record a sample */
245 1.10 ryo tfi.tfi_pc = tf->tf_pc;
246 1.10 ryo tfi.tfi_counter = bit;
247 1.10 ryo tfi.tfi_inkernel =
248 1.10 ryo tfi.tfi_pc >= VM_MIN_KERNEL_ADDRESS &&
249 1.10 ryo tfi.tfi_pc < VM_MAX_KERNEL_ADDRESS;
250 1.10 ryo tprof_sample(NULL, &tfi);
251 1.12 ryo } else if (ISSET(sc->sc_ctr_ovf_mask, __BIT(bit))) {
252 1.10 ryo /* counter has overflowed */
253 1.10 ryo counters_offset[bit] += __BIT(32);
254 1.10 ryo }
255 1.1 jmcneill }
256 1.10 ryo armreg_pmovsr_write(pmovs);
257 1.1 jmcneill
258 1.1 jmcneill return 1;
259 1.1 jmcneill }
260 1.1 jmcneill
261 1.10 ryo static uint32_t
262 1.10 ryo armv7_pmu_ident(void)
263 1.10 ryo {
264 1.10 ryo return TPROF_IDENT_ARMV7_GENERIC;
265 1.10 ryo }
266 1.10 ryo
267 1.10 ryo static const tprof_backend_ops_t tprof_armv7_pmu_ops = {
268 1.10 ryo .tbo_ident = armv7_pmu_ident,
269 1.10 ryo .tbo_ncounters = armv7_pmu_ncounters,
270 1.10 ryo .tbo_counter_bitwidth = armv7_pmu_counter_bitwidth,
271 1.10 ryo .tbo_counter_read = armv7_pmu_get_pmevcntr,
272 1.10 ryo .tbo_counter_estimate_freq = armv7_pmu_counter_estimate_freq,
273 1.10 ryo .tbo_valid_event = armv7_pmu_valid_event,
274 1.10 ryo .tbo_configure_event = armv7_pmu_configure_event,
275 1.10 ryo .tbo_start = armv7_pmu_start,
276 1.10 ryo .tbo_stop = armv7_pmu_stop,
277 1.10 ryo .tbo_establish = NULL,
278 1.10 ryo .tbo_disestablish = NULL,
279 1.10 ryo };
280 1.10 ryo
281 1.9 ryo static void
282 1.9 ryo armv7_pmu_init_cpu(void *arg1, void *arg2)
283 1.1 jmcneill {
284 1.2 jmcneill /* Disable user mode access to performance monitors */
285 1.2 jmcneill armreg_pmuserenr_write(0);
286 1.2 jmcneill
287 1.2 jmcneill /* Disable interrupts */
288 1.8 ryo armreg_pmintenclr_write(PMINTEN_P);
289 1.2 jmcneill
290 1.2 jmcneill /* Disable counters */
291 1.8 ryo armreg_pmcntenclr_write(PMCNTEN_P);
292 1.9 ryo }
293 1.9 ryo
294 1.9 ryo int
295 1.9 ryo armv7_pmu_init(void)
296 1.9 ryo {
297 1.10 ryo int error, ncounters;
298 1.10 ryo
299 1.10 ryo ncounters = armv7_pmu_ncounters();
300 1.10 ryo if (ncounters == 0)
301 1.10 ryo return ENOTSUP;
302 1.10 ryo
303 1.9 ryo uint64_t xc = xc_broadcast(0, armv7_pmu_init_cpu, NULL, NULL);
304 1.9 ryo xc_wait(xc);
305 1.2 jmcneill
306 1.10 ryo error = tprof_backend_register("tprof_armv7", &tprof_armv7_pmu_ops,
307 1.1 jmcneill TPROF_BACKEND_VERSION);
308 1.10 ryo if (error == 0) {
309 1.10 ryo /* XXX: for argument of armv7_pmu_intr() */
310 1.10 ryo pmu_intr_arg = tprof_backend;
311 1.10 ryo }
312 1.10 ryo
313 1.10 ryo return error;
314 1.1 jmcneill }
315