kern_cctr.c revision 1.12 1 1.12 thorpej /* $NetBSD: kern_cctr.c,v 1.12 2020/10/10 18:18:04 thorpej Exp $ */
2 1.1 tsutsui
3 1.1 tsutsui /*-
4 1.11 thorpej * Copyright (c) 2020 Jason R. Thorpe
5 1.11 thorpej * Copyright (c) 2018 Naruaki Etomi
6 1.1 tsutsui * All rights reserved.
7 1.1 tsutsui *
8 1.1 tsutsui * Redistribution and use in source and binary forms, with or without
9 1.1 tsutsui * modification, are permitted provided that the following conditions
10 1.1 tsutsui * are met:
11 1.1 tsutsui * 1. Redistributions of source code must retain the above copyright
12 1.1 tsutsui * notice, this list of conditions and the following disclaimer.
13 1.1 tsutsui * 2. Redistributions in binary form must reproduce the above copyright
14 1.1 tsutsui * notice, this list of conditions and the following disclaimer in the
15 1.1 tsutsui * documentation and/or other materials provided with the distribution.
16 1.1 tsutsui *
17 1.11 thorpej * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 1.11 thorpej * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 1.11 thorpej * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 1.11 thorpej * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 1.11 thorpej * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 1.11 thorpej * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 1.11 thorpej * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 1.11 thorpej * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 1.11 thorpej * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 1.11 thorpej * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 1.1 tsutsui */
28 1.1 tsutsui
29 1.11 thorpej /*
30 1.11 thorpej * Most of the following was adapted from the Linux/ia64 cycle counter
31 1.11 thorpej * synchronization algorithm:
32 1.11 thorpej *
33 1.11 thorpej * IA-64 Linux Kernel: Design and Implementation p356-p361
34 1.11 thorpej * (Hewlett-Packard Professional Books)
35 1.11 thorpej *
36 1.11 thorpej * Here's a rough description of how it works.
37 1.11 thorpej *
38 1.11 thorpej * The primary CPU is the reference monotonic counter. Each secondary
39 1.11 thorpej * CPU is responsible for knowing the offset of its own cycle counter
40 1.11 thorpej * relative to the primary's. When the time counter is read, the CC
41 1.11 thorpej * value is adjusted by this delta.
42 1.11 thorpej *
43 1.11 thorpej * Calibration happens periodically, and works like this:
44 1.11 thorpej *
45 1.11 thorpej * Secondary CPU Primary CPU
46 1.11 thorpej * Send IPI to publish reference CC
47 1.11 thorpej * --------->
48 1.12 thorpej * Indicate Primary Ready
49 1.12 thorpej * <----------------------------
50 1.12 thorpej * T0 = local CC
51 1.12 thorpej * Indicate Secondary Ready
52 1.12 thorpej * ----------------->
53 1.11 thorpej * (assume this happens at Tavg) Publish reference CC
54 1.12 thorpej * Indicate completion
55 1.12 thorpej * <------------------------
56 1.12 thorpej * Notice completion
57 1.11 thorpej * T1 = local CC
58 1.11 thorpej *
59 1.11 thorpej * Tavg = (T0 + T1) / 2
60 1.1 tsutsui *
61 1.11 thorpej * Delta = Tavg - Published primary CC value
62 1.1 tsutsui *
63 1.12 thorpej * "Notice completion" is performed by waiting for the primary to set
64 1.12 thorpej * the calibration state to FINISHED. This is a little unfortunate,
65 1.12 thorpej * because T0->Tavg involves a single store-release on the secondary, and
66 1.12 thorpej * Tavg->T1 involves a store-relaxed and a store-release. It would be
67 1.12 thorpej * better to simply wait for the reference CC to transition from 0 to
68 1.12 thorpej * non-0 (i.e. just wait for a single store-release from Tavg->T1), but
69 1.12 thorpej * if the cycle counter just happened to read back as 0 at that instant,
70 1.12 thorpej * we would never break out of the loop.
71 1.12 thorpej *
72 1.11 thorpej * We trigger calibration roughly once a second; the period is actually
73 1.11 thorpej * skewed based on the CPU index in order to avoid lock contention. The
74 1.11 thorpej * calibration interval does not need to be precise, and so this is fine.
75 1.1 tsutsui */
76 1.1 tsutsui
77 1.1 tsutsui #include <sys/cdefs.h>
78 1.12 thorpej __KERNEL_RCSID(0, "$NetBSD: kern_cctr.c,v 1.12 2020/10/10 18:18:04 thorpej Exp $");
79 1.1 tsutsui
80 1.1 tsutsui #include <sys/param.h>
81 1.11 thorpej #include <sys/atomic.h>
82 1.1 tsutsui #include <sys/systm.h>
83 1.1 tsutsui #include <sys/sysctl.h>
84 1.11 thorpej #include <sys/timepps.h>
85 1.1 tsutsui #include <sys/time.h>
86 1.1 tsutsui #include <sys/timetc.h>
87 1.1 tsutsui #include <sys/kernel.h>
88 1.1 tsutsui #include <sys/power.h>
89 1.2 ad #include <sys/cpu.h>
90 1.1 tsutsui #include <machine/cpu_counter.h>
91 1.1 tsutsui
92 1.1 tsutsui /* XXX make cc_timecounter.tc_frequency settable by sysctl() */
93 1.1 tsutsui
94 1.11 thorpej #if defined(MULTIPROCESSOR)
95 1.11 thorpej static uint32_t cc_primary __cacheline_aligned;
96 1.11 thorpej static uint32_t cc_calibration_state __cacheline_aligned;
97 1.11 thorpej static kmutex_t cc_calibration_lock __cacheline_aligned;
98 1.11 thorpej
99 1.11 thorpej #define CC_CAL_START 0 /* initial state */
100 1.11 thorpej #define CC_CAL_PRIMARY_READY 1 /* primary CPU ready to respond */
101 1.11 thorpej #define CC_CAL_SECONDARY_READY 2 /* secondary CPU ready to receive */
102 1.11 thorpej #define CC_CAL_FINISHED 3 /* calibration attempt complete */
103 1.11 thorpej #endif /* MULTIPROCESSOR */
104 1.1 tsutsui
105 1.1 tsutsui static struct timecounter cc_timecounter = {
106 1.1 tsutsui .tc_get_timecount = cc_get_timecount,
107 1.11 thorpej .tc_poll_pps = NULL,
108 1.1 tsutsui .tc_counter_mask = ~0u,
109 1.1 tsutsui .tc_frequency = 0,
110 1.10 skrll .tc_name = "unknown cycle counter",
111 1.1 tsutsui /*
112 1.1 tsutsui * don't pick cycle counter automatically
113 1.1 tsutsui * if frequency changes might affect cycle counter
114 1.1 tsutsui */
115 1.1 tsutsui .tc_quality = -100000,
116 1.1 tsutsui
117 1.1 tsutsui .tc_priv = NULL,
118 1.1 tsutsui .tc_next = NULL
119 1.1 tsutsui };
120 1.1 tsutsui
121 1.1 tsutsui /*
122 1.11 thorpej * Initialize cycle counter based timecounter. This must be done on the
123 1.11 thorpej * primary CPU.
124 1.1 tsutsui */
125 1.1 tsutsui struct timecounter *
126 1.5 tsutsui cc_init(timecounter_get_t getcc, uint64_t freq, const char *name, int quality)
127 1.1 tsutsui {
128 1.11 thorpej static bool cc_init_done __diagused;
129 1.11 thorpej struct cpu_info * const ci = curcpu();
130 1.11 thorpej
131 1.11 thorpej KASSERT(!cc_init_done);
132 1.11 thorpej KASSERT(cold);
133 1.11 thorpej KASSERT(CPU_IS_PRIMARY(ci));
134 1.11 thorpej
135 1.11 thorpej #if defined(MULTIPROCESSOR)
136 1.11 thorpej mutex_init(&cc_calibration_lock, MUTEX_DEFAULT, IPL_HIGH);
137 1.11 thorpej #endif
138 1.11 thorpej
139 1.11 thorpej cc_init_done = true;
140 1.11 thorpej
141 1.11 thorpej ci->ci_cc.cc_delta = 0;
142 1.11 thorpej ci->ci_cc.cc_ticks = 0;
143 1.11 thorpej ci->ci_cc.cc_cal_ticks = 0;
144 1.1 tsutsui
145 1.5 tsutsui if (getcc != NULL)
146 1.5 tsutsui cc_timecounter.tc_get_timecount = getcc;
147 1.5 tsutsui
148 1.1 tsutsui cc_timecounter.tc_frequency = freq;
149 1.1 tsutsui cc_timecounter.tc_name = name;
150 1.1 tsutsui cc_timecounter.tc_quality = quality;
151 1.1 tsutsui tc_init(&cc_timecounter);
152 1.1 tsutsui
153 1.1 tsutsui return &cc_timecounter;
154 1.1 tsutsui }
155 1.1 tsutsui
156 1.1 tsutsui /*
157 1.11 thorpej * Initialize cycle counter timecounter calibration data on a secondary
158 1.11 thorpej * CPU. Must be called on that secondary CPU.
159 1.11 thorpej */
160 1.11 thorpej void
161 1.11 thorpej cc_init_secondary(struct cpu_info * const ci)
162 1.11 thorpej {
163 1.11 thorpej KASSERT(!CPU_IS_PRIMARY(curcpu()));
164 1.11 thorpej KASSERT(ci == curcpu());
165 1.11 thorpej
166 1.11 thorpej ci->ci_cc.cc_ticks = 0;
167 1.11 thorpej
168 1.11 thorpej /*
169 1.11 thorpej * It's not critical that calibration be performed in
170 1.11 thorpej * precise intervals, so skew when calibration is done
171 1.11 thorpej * on each secondary CPU based on it's CPU index to
172 1.11 thorpej * avoid contending on the calibration lock.
173 1.11 thorpej */
174 1.11 thorpej ci->ci_cc.cc_cal_ticks = hz - cpu_index(ci);
175 1.11 thorpej KASSERT(ci->ci_cc.cc_cal_ticks);
176 1.11 thorpej
177 1.11 thorpej cc_calibrate_cpu(ci);
178 1.11 thorpej }
179 1.11 thorpej
180 1.11 thorpej /*
181 1.1 tsutsui * pick up tick count scaled to reference tick count
182 1.1 tsutsui */
183 1.5 tsutsui u_int
184 1.1 tsutsui cc_get_timecount(struct timecounter *tc)
185 1.1 tsutsui {
186 1.11 thorpej #if defined(MULTIPROCESSOR)
187 1.11 thorpej int64_t rcc, ncsw;
188 1.1 tsutsui
189 1.6 ad retry:
190 1.6 ad ncsw = curlwp->l_ncsw;
191 1.11 thorpej
192 1.11 thorpej __insn_barrier();
193 1.11 thorpej /* N.B. the delta is always 0 on the primary. */
194 1.11 thorpej rcc = cpu_counter32() - curcpu()->ci_cc.cc_delta;
195 1.6 ad __insn_barrier();
196 1.1 tsutsui
197 1.6 ad if (ncsw != curlwp->l_ncsw) {
198 1.6 ad /* Was preempted */
199 1.6 ad goto retry;
200 1.6 ad }
201 1.1 tsutsui
202 1.1 tsutsui return rcc;
203 1.11 thorpej #else
204 1.11 thorpej return cpu_counter32();
205 1.11 thorpej #endif /* MULTIPROCESSOR */
206 1.1 tsutsui }
207 1.1 tsutsui
208 1.11 thorpej #if defined(MULTIPROCESSOR)
209 1.11 thorpej static inline bool
210 1.11 thorpej cc_get_delta(struct cpu_info * const ci)
211 1.1 tsutsui {
212 1.11 thorpej int64_t t0, t1, tcenter = 0;
213 1.11 thorpej
214 1.11 thorpej t0 = cpu_counter32();
215 1.6 ad
216 1.11 thorpej atomic_store_release(&cc_calibration_state, CC_CAL_SECONDARY_READY);
217 1.6 ad
218 1.11 thorpej for (;;) {
219 1.11 thorpej if (atomic_load_acquire(&cc_calibration_state) ==
220 1.11 thorpej CC_CAL_FINISHED) {
221 1.11 thorpej break;
222 1.11 thorpej }
223 1.11 thorpej }
224 1.1 tsutsui
225 1.11 thorpej t1 = cpu_counter32();
226 1.11 thorpej
227 1.11 thorpej if (t1 < t0) {
228 1.11 thorpej /* Overflow! */
229 1.11 thorpej return false;
230 1.11 thorpej }
231 1.11 thorpej
232 1.11 thorpej /* average t0 and t1 without overflow: */
233 1.11 thorpej tcenter = (t0 >> 1) + (t1 >> 1);
234 1.11 thorpej if ((t0 & 1) + (t1 & 1) == 2)
235 1.11 thorpej tcenter++;
236 1.11 thorpej
237 1.11 thorpej ci->ci_cc.cc_delta = tcenter - cc_primary;
238 1.11 thorpej
239 1.11 thorpej return true;
240 1.1 tsutsui }
241 1.11 thorpej #endif /* MULTIPROCESSOR */
242 1.1 tsutsui
243 1.1 tsutsui /*
244 1.11 thorpej * Called on secondary CPUs to calibrate their cycle counter offset
245 1.11 thorpej * relative to the primary CPU.
246 1.1 tsutsui */
247 1.1 tsutsui void
248 1.11 thorpej cc_calibrate_cpu(struct cpu_info * const ci)
249 1.1 tsutsui {
250 1.11 thorpej #if defined(MULTIPROCESSOR)
251 1.11 thorpej KASSERT(!CPU_IS_PRIMARY(ci));
252 1.11 thorpej
253 1.11 thorpej mutex_spin_enter(&cc_calibration_lock);
254 1.11 thorpej
255 1.11 thorpej retry:
256 1.11 thorpej atomic_store_release(&cc_calibration_state, CC_CAL_START);
257 1.11 thorpej
258 1.11 thorpej /* Trigger primary CPU. */
259 1.11 thorpej cc_get_primary_cc();
260 1.11 thorpej
261 1.11 thorpej for (;;) {
262 1.11 thorpej if (atomic_load_acquire(&cc_calibration_state) ==
263 1.11 thorpej CC_CAL_PRIMARY_READY) {
264 1.11 thorpej break;
265 1.11 thorpej }
266 1.11 thorpej }
267 1.1 tsutsui
268 1.11 thorpej if (! cc_get_delta(ci)) {
269 1.11 thorpej goto retry;
270 1.1 tsutsui }
271 1.1 tsutsui
272 1.11 thorpej mutex_exit(&cc_calibration_lock);
273 1.11 thorpej #endif /* MULTIPROCESSOR */
274 1.11 thorpej }
275 1.11 thorpej
276 1.11 thorpej void
277 1.11 thorpej cc_primary_cc(void)
278 1.11 thorpej {
279 1.11 thorpej #if defined(MULTIPROCESSOR)
280 1.11 thorpej /* N.B. We expect all interrupts to be blocked. */
281 1.11 thorpej
282 1.11 thorpej atomic_store_release(&cc_calibration_state, CC_CAL_PRIMARY_READY);
283 1.11 thorpej
284 1.11 thorpej for (;;) {
285 1.11 thorpej if (atomic_load_acquire(&cc_calibration_state) ==
286 1.11 thorpej CC_CAL_SECONDARY_READY) {
287 1.11 thorpej break;
288 1.11 thorpej }
289 1.11 thorpej }
290 1.1 tsutsui
291 1.11 thorpej cc_primary = cpu_counter32();
292 1.11 thorpej atomic_store_release(&cc_calibration_state, CC_CAL_FINISHED);
293 1.11 thorpej #endif /* MULTIPROCESSOR */
294 1.1 tsutsui }
295