subr_pserialize.c revision 1.10 1 1.10 msaitoh /* $NetBSD: subr_pserialize.c,v 1.10 2017/12/28 03:39:48 msaitoh Exp $ */
2 1.1 christos
3 1.1 christos /*-
4 1.1 christos * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc.
5 1.1 christos * All rights reserved.
6 1.1 christos *
7 1.1 christos * Redistribution and use in source and binary forms, with or without
8 1.1 christos * modification, are permitted provided that the following conditions
9 1.1 christos * are met:
10 1.1 christos * 1. Redistributions of source code must retain the above copyright
11 1.1 christos * notice, this list of conditions and the following disclaimer.
12 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 christos * notice, this list of conditions and the following disclaimer in the
14 1.1 christos * documentation and/or other materials provided with the distribution.
15 1.1 christos *
16 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 1.1 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 1.1 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 1.1 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 1.1 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.1 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.1 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.1 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.1 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.1 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.1 christos * POSSIBILITY OF SUCH DAMAGE.
27 1.1 christos */
28 1.1 christos
29 1.1 christos /*
30 1.1 christos * Passive serialization.
31 1.1 christos *
32 1.1 christos * Implementation accurately matches the lapsed US patent 4809168, therefore
33 1.1 christos * code is patent-free in the United States. Your use of this code is at
34 1.1 christos * your own risk.
35 1.1 christos *
36 1.1 christos * Note for NetBSD developers: all changes to this source file must be
37 1.1 christos * approved by the <core>.
38 1.1 christos */
39 1.1 christos
40 1.1 christos #include <sys/cdefs.h>
41 1.10 msaitoh __KERNEL_RCSID(0, "$NetBSD: subr_pserialize.c,v 1.10 2017/12/28 03:39:48 msaitoh Exp $");
42 1.1 christos
43 1.1 christos #include <sys/param.h>
44 1.1 christos
45 1.1 christos #include <sys/condvar.h>
46 1.1 christos #include <sys/cpu.h>
47 1.2 he #include <sys/evcnt.h>
48 1.1 christos #include <sys/kmem.h>
49 1.1 christos #include <sys/mutex.h>
50 1.1 christos #include <sys/pserialize.h>
51 1.7 rmind #include <sys/proc.h>
52 1.1 christos #include <sys/queue.h>
53 1.1 christos #include <sys/xcall.h>
54 1.1 christos
55 1.1 christos struct pserialize {
56 1.1 christos TAILQ_ENTRY(pserialize) psz_chain;
57 1.1 christos lwp_t * psz_owner;
58 1.1 christos kcpuset_t * psz_target;
59 1.1 christos kcpuset_t * psz_pass;
60 1.1 christos };
61 1.1 christos
62 1.1 christos static u_int psz_work_todo __cacheline_aligned;
63 1.1 christos static kmutex_t psz_lock __cacheline_aligned;
64 1.1 christos static struct evcnt psz_ev_excl __cacheline_aligned;
65 1.1 christos
66 1.1 christos /*
67 1.1 christos * As defined in "Method 1":
68 1.1 christos * q0: "0 MP checkpoints have occured".
69 1.1 christos * q1: "1 MP checkpoint has occured".
70 1.1 christos * q2: "2 MP checkpoints have occured".
71 1.1 christos */
72 1.1 christos static TAILQ_HEAD(, pserialize) psz_queue0 __cacheline_aligned;
73 1.1 christos static TAILQ_HEAD(, pserialize) psz_queue1 __cacheline_aligned;
74 1.1 christos static TAILQ_HEAD(, pserialize) psz_queue2 __cacheline_aligned;
75 1.1 christos
76 1.9 ozaki #ifdef LOCKDEBUG
77 1.9 ozaki #include <sys/percpu.h>
78 1.9 ozaki
79 1.9 ozaki static percpu_t *psz_debug_nreads __cacheline_aligned;
80 1.9 ozaki #endif
81 1.9 ozaki
82 1.1 christos /*
83 1.1 christos * pserialize_init:
84 1.1 christos *
85 1.1 christos * Initialize passive serialization structures.
86 1.1 christos */
87 1.1 christos void
88 1.1 christos pserialize_init(void)
89 1.1 christos {
90 1.1 christos
91 1.1 christos psz_work_todo = 0;
92 1.1 christos TAILQ_INIT(&psz_queue0);
93 1.1 christos TAILQ_INIT(&psz_queue1);
94 1.1 christos TAILQ_INIT(&psz_queue2);
95 1.1 christos mutex_init(&psz_lock, MUTEX_DEFAULT, IPL_SCHED);
96 1.1 christos evcnt_attach_dynamic(&psz_ev_excl, EVCNT_TYPE_MISC, NULL,
97 1.1 christos "pserialize", "exclusive access");
98 1.9 ozaki #ifdef LOCKDEBUG
99 1.9 ozaki psz_debug_nreads = percpu_alloc(sizeof(uint32_t));
100 1.9 ozaki #endif
101 1.1 christos }
102 1.1 christos
103 1.1 christos /*
104 1.1 christos * pserialize_create:
105 1.1 christos *
106 1.1 christos * Create and initialize a passive serialization object.
107 1.1 christos */
108 1.1 christos pserialize_t
109 1.1 christos pserialize_create(void)
110 1.1 christos {
111 1.1 christos pserialize_t psz;
112 1.1 christos
113 1.1 christos psz = kmem_zalloc(sizeof(struct pserialize), KM_SLEEP);
114 1.4 rmind kcpuset_create(&psz->psz_target, true);
115 1.4 rmind kcpuset_create(&psz->psz_pass, true);
116 1.1 christos psz->psz_owner = NULL;
117 1.1 christos
118 1.1 christos return psz;
119 1.1 christos }
120 1.1 christos
121 1.1 christos /*
122 1.1 christos * pserialize_destroy:
123 1.1 christos *
124 1.1 christos * Destroy a passive serialization object.
125 1.1 christos */
126 1.1 christos void
127 1.1 christos pserialize_destroy(pserialize_t psz)
128 1.1 christos {
129 1.1 christos
130 1.1 christos KASSERT(psz->psz_owner == NULL);
131 1.1 christos
132 1.1 christos kcpuset_destroy(psz->psz_target);
133 1.1 christos kcpuset_destroy(psz->psz_pass);
134 1.1 christos kmem_free(psz, sizeof(struct pserialize));
135 1.1 christos }
136 1.1 christos
137 1.1 christos /*
138 1.1 christos * pserialize_perform:
139 1.1 christos *
140 1.1 christos * Perform the write side of passive serialization. The calling
141 1.1 christos * thread holds an exclusive lock on the data object(s) being updated.
142 1.1 christos * We wait until every processor in the system has made at least two
143 1.8 dholland * passes through cpu_switchto(). The wait is made with the caller's
144 1.1 christos * update lock held, but is short term.
145 1.1 christos */
146 1.1 christos void
147 1.1 christos pserialize_perform(pserialize_t psz)
148 1.1 christos {
149 1.6 rmind uint64_t xc;
150 1.1 christos
151 1.1 christos KASSERT(!cpu_intr_p());
152 1.1 christos KASSERT(!cpu_softintr_p());
153 1.1 christos
154 1.1 christos if (__predict_false(panicstr != NULL)) {
155 1.1 christos return;
156 1.1 christos }
157 1.1 christos KASSERT(psz->psz_owner == NULL);
158 1.1 christos KASSERT(ncpu > 0);
159 1.1 christos
160 1.10 msaitoh if (__predict_false(mp_online == false)) {
161 1.10 msaitoh psz_ev_excl.ev_count++;
162 1.10 msaitoh return;
163 1.10 msaitoh }
164 1.10 msaitoh
165 1.1 christos /*
166 1.1 christos * Set up the object and put it onto the queue. The lock
167 1.1 christos * activity here provides the necessary memory barrier to
168 1.1 christos * make the caller's data update completely visible to
169 1.1 christos * other processors.
170 1.1 christos */
171 1.1 christos psz->psz_owner = curlwp;
172 1.5 rmind kcpuset_copy(psz->psz_target, kcpuset_running);
173 1.1 christos kcpuset_zero(psz->psz_pass);
174 1.1 christos
175 1.1 christos mutex_spin_enter(&psz_lock);
176 1.1 christos TAILQ_INSERT_TAIL(&psz_queue0, psz, psz_chain);
177 1.1 christos psz_work_todo++;
178 1.1 christos
179 1.7 rmind do {
180 1.7 rmind mutex_spin_exit(&psz_lock);
181 1.7 rmind
182 1.7 rmind /*
183 1.7 rmind * Force some context switch activity on every CPU, as
184 1.7 rmind * the system may not be busy. Pause to not flood.
185 1.7 rmind */
186 1.7 rmind xc = xc_broadcast(XC_HIGHPRI, (xcfunc_t)nullop, NULL, NULL);
187 1.7 rmind xc_wait(xc);
188 1.7 rmind kpause("psrlz", false, 1, NULL);
189 1.6 rmind
190 1.7 rmind mutex_spin_enter(&psz_lock);
191 1.7 rmind } while (!kcpuset_iszero(psz->psz_target));
192 1.1 christos
193 1.1 christos psz_ev_excl.ev_count++;
194 1.1 christos mutex_spin_exit(&psz_lock);
195 1.1 christos
196 1.1 christos psz->psz_owner = NULL;
197 1.1 christos }
198 1.1 christos
199 1.1 christos int
200 1.1 christos pserialize_read_enter(void)
201 1.1 christos {
202 1.9 ozaki int s;
203 1.1 christos
204 1.1 christos KASSERT(!cpu_intr_p());
205 1.9 ozaki s = splsoftserial();
206 1.9 ozaki #ifdef LOCKDEBUG
207 1.9 ozaki {
208 1.9 ozaki uint32_t *nreads;
209 1.9 ozaki nreads = percpu_getref(psz_debug_nreads);
210 1.9 ozaki (*nreads)++;
211 1.9 ozaki if (*nreads == 0)
212 1.9 ozaki panic("nreads overflow");
213 1.9 ozaki percpu_putref(psz_debug_nreads);
214 1.9 ozaki }
215 1.9 ozaki #endif
216 1.9 ozaki return s;
217 1.1 christos }
218 1.1 christos
219 1.1 christos void
220 1.1 christos pserialize_read_exit(int s)
221 1.1 christos {
222 1.1 christos
223 1.9 ozaki #ifdef LOCKDEBUG
224 1.9 ozaki {
225 1.9 ozaki uint32_t *nreads;
226 1.9 ozaki nreads = percpu_getref(psz_debug_nreads);
227 1.9 ozaki (*nreads)--;
228 1.9 ozaki if (*nreads == UINT_MAX)
229 1.9 ozaki panic("nreads underflow");
230 1.9 ozaki percpu_putref(psz_debug_nreads);
231 1.9 ozaki }
232 1.9 ozaki #endif
233 1.1 christos splx(s);
234 1.1 christos }
235 1.1 christos
236 1.1 christos /*
237 1.1 christos * pserialize_switchpoint:
238 1.1 christos *
239 1.1 christos * Monitor system context switch activity. Called from machine
240 1.1 christos * independent code after mi_switch() returns.
241 1.1 christos */
242 1.1 christos void
243 1.1 christos pserialize_switchpoint(void)
244 1.1 christos {
245 1.1 christos pserialize_t psz, next;
246 1.1 christos cpuid_t cid;
247 1.1 christos
248 1.9 ozaki /* We must to ensure not to come here from inside a read section. */
249 1.9 ozaki KASSERT(pserialize_not_in_read_section());
250 1.9 ozaki
251 1.1 christos /*
252 1.1 christos * If no updates pending, bail out. No need to lock in order to
253 1.1 christos * test psz_work_todo; the only ill effect of missing an update
254 1.1 christos * would be to delay LWPs waiting in pserialize_perform(). That
255 1.1 christos * will not happen because updates are on the queue before an
256 1.1 christos * xcall is generated (serialization) to tickle every CPU.
257 1.1 christos */
258 1.1 christos if (__predict_true(psz_work_todo == 0)) {
259 1.1 christos return;
260 1.1 christos }
261 1.1 christos mutex_spin_enter(&psz_lock);
262 1.1 christos cid = cpu_index(curcpu());
263 1.1 christos
264 1.1 christos /*
265 1.1 christos * At first, scan through the second queue and update each request,
266 1.1 christos * if passed all processors, then transfer to the third queue.
267 1.1 christos */
268 1.1 christos for (psz = TAILQ_FIRST(&psz_queue1); psz != NULL; psz = next) {
269 1.1 christos next = TAILQ_NEXT(psz, psz_chain);
270 1.7 rmind kcpuset_set(psz->psz_pass, cid);
271 1.1 christos if (!kcpuset_match(psz->psz_pass, psz->psz_target)) {
272 1.1 christos continue;
273 1.1 christos }
274 1.1 christos kcpuset_zero(psz->psz_pass);
275 1.1 christos TAILQ_REMOVE(&psz_queue1, psz, psz_chain);
276 1.1 christos TAILQ_INSERT_TAIL(&psz_queue2, psz, psz_chain);
277 1.1 christos }
278 1.1 christos /*
279 1.1 christos * Scan through the first queue and update each request,
280 1.1 christos * if passed all processors, then move to the second queue.
281 1.1 christos */
282 1.1 christos for (psz = TAILQ_FIRST(&psz_queue0); psz != NULL; psz = next) {
283 1.1 christos next = TAILQ_NEXT(psz, psz_chain);
284 1.7 rmind kcpuset_set(psz->psz_pass, cid);
285 1.1 christos if (!kcpuset_match(psz->psz_pass, psz->psz_target)) {
286 1.1 christos continue;
287 1.1 christos }
288 1.1 christos kcpuset_zero(psz->psz_pass);
289 1.1 christos TAILQ_REMOVE(&psz_queue0, psz, psz_chain);
290 1.1 christos TAILQ_INSERT_TAIL(&psz_queue1, psz, psz_chain);
291 1.1 christos }
292 1.1 christos /*
293 1.1 christos * Process the third queue: entries have been seen twice on every
294 1.1 christos * processor, remove from the queue and notify the updating thread.
295 1.1 christos */
296 1.1 christos while ((psz = TAILQ_FIRST(&psz_queue2)) != NULL) {
297 1.1 christos TAILQ_REMOVE(&psz_queue2, psz, psz_chain);
298 1.1 christos kcpuset_zero(psz->psz_target);
299 1.1 christos psz_work_todo--;
300 1.1 christos }
301 1.1 christos mutex_spin_exit(&psz_lock);
302 1.1 christos }
303 1.9 ozaki
304 1.9 ozaki /*
305 1.9 ozaki * pserialize_in_read_section:
306 1.9 ozaki *
307 1.9 ozaki * True if the caller is in a pserialize read section. To be used only
308 1.9 ozaki * for diagnostic assertions where we want to guarantee the condition like:
309 1.9 ozaki *
310 1.9 ozaki * KASSERT(pserialize_in_read_section());
311 1.9 ozaki */
312 1.9 ozaki bool
313 1.9 ozaki pserialize_in_read_section(void)
314 1.9 ozaki {
315 1.9 ozaki #ifdef LOCKDEBUG
316 1.9 ozaki uint32_t *nreads;
317 1.9 ozaki bool in;
318 1.9 ozaki
319 1.9 ozaki /* Not initialized yet */
320 1.9 ozaki if (__predict_false(psz_debug_nreads == NULL))
321 1.9 ozaki return true;
322 1.9 ozaki
323 1.9 ozaki nreads = percpu_getref(psz_debug_nreads);
324 1.9 ozaki in = *nreads != 0;
325 1.9 ozaki percpu_putref(psz_debug_nreads);
326 1.9 ozaki
327 1.9 ozaki return in;
328 1.9 ozaki #else
329 1.9 ozaki return true;
330 1.9 ozaki #endif
331 1.9 ozaki }
332 1.9 ozaki
333 1.9 ozaki /*
334 1.9 ozaki * pserialize_not_in_read_section:
335 1.9 ozaki *
336 1.9 ozaki * True if the caller is not in a pserialize read section. To be used only
337 1.9 ozaki * for diagnostic assertions where we want to guarantee the condition like:
338 1.9 ozaki *
339 1.9 ozaki * KASSERT(pserialize_not_in_read_section());
340 1.9 ozaki */
341 1.9 ozaki bool
342 1.9 ozaki pserialize_not_in_read_section(void)
343 1.9 ozaki {
344 1.9 ozaki #ifdef LOCKDEBUG
345 1.9 ozaki uint32_t *nreads;
346 1.9 ozaki bool notin;
347 1.9 ozaki
348 1.9 ozaki /* Not initialized yet */
349 1.9 ozaki if (__predict_false(psz_debug_nreads == NULL))
350 1.9 ozaki return true;
351 1.9 ozaki
352 1.9 ozaki nreads = percpu_getref(psz_debug_nreads);
353 1.9 ozaki notin = *nreads == 0;
354 1.9 ozaki percpu_putref(psz_debug_nreads);
355 1.9 ozaki
356 1.9 ozaki return notin;
357 1.9 ozaki #else
358 1.9 ozaki return true;
359 1.9 ozaki #endif
360 1.9 ozaki }
361