subr_pserialize.c revision 1.11.2.1 1 /* $NetBSD: subr_pserialize.c,v 1.11.2.1 2019/06/10 22:09:03 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Passive serialization.
31 *
32 * Implementation accurately matches the lapsed US patent 4809168, therefore
33 * code is patent-free in the United States. Your use of this code is at
34 * your own risk.
35 *
36 * Note for NetBSD developers: all changes to this source file must be
37 * approved by the <core>.
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: subr_pserialize.c,v 1.11.2.1 2019/06/10 22:09:03 christos Exp $");
42
43 #include <sys/param.h>
44
45 #include <sys/condvar.h>
46 #include <sys/cpu.h>
47 #include <sys/evcnt.h>
48 #include <sys/kmem.h>
49 #include <sys/mutex.h>
50 #include <sys/pserialize.h>
51 #include <sys/proc.h>
52 #include <sys/queue.h>
53 #include <sys/xcall.h>
54
55 struct pserialize {
56 TAILQ_ENTRY(pserialize) psz_chain;
57 lwp_t * psz_owner;
58 kcpuset_t * psz_target;
59 kcpuset_t * psz_pass;
60 };
61
62 static u_int psz_work_todo __cacheline_aligned;
63 static kmutex_t psz_lock __cacheline_aligned;
64 static struct evcnt psz_ev_excl __cacheline_aligned;
65
66 /*
67 * As defined in "Method 1":
68 * q0: "0 MP checkpoints have occured".
69 * q1: "1 MP checkpoint has occured".
70 * q2: "2 MP checkpoints have occured".
71 */
72 static TAILQ_HEAD(, pserialize) psz_queue0 __cacheline_aligned;
73 static TAILQ_HEAD(, pserialize) psz_queue1 __cacheline_aligned;
74 static TAILQ_HEAD(, pserialize) psz_queue2 __cacheline_aligned;
75
76 #ifdef LOCKDEBUG
77 #include <sys/percpu.h>
78
79 static percpu_t *psz_debug_nreads __cacheline_aligned;
80 #endif
81
82 /*
83 * pserialize_init:
84 *
85 * Initialize passive serialization structures.
86 */
87 void
88 pserialize_init(void)
89 {
90
91 psz_work_todo = 0;
92 TAILQ_INIT(&psz_queue0);
93 TAILQ_INIT(&psz_queue1);
94 TAILQ_INIT(&psz_queue2);
95 mutex_init(&psz_lock, MUTEX_DEFAULT, IPL_SCHED);
96 evcnt_attach_dynamic(&psz_ev_excl, EVCNT_TYPE_MISC, NULL,
97 "pserialize", "exclusive access");
98 #ifdef LOCKDEBUG
99 psz_debug_nreads = percpu_alloc(sizeof(uint32_t));
100 #endif
101 }
102
103 /*
104 * pserialize_create:
105 *
106 * Create and initialize a passive serialization object.
107 */
108 pserialize_t
109 pserialize_create(void)
110 {
111 pserialize_t psz;
112
113 psz = kmem_zalloc(sizeof(struct pserialize), KM_SLEEP);
114 kcpuset_create(&psz->psz_target, true);
115 kcpuset_create(&psz->psz_pass, true);
116 psz->psz_owner = NULL;
117
118 return psz;
119 }
120
121 /*
122 * pserialize_destroy:
123 *
124 * Destroy a passive serialization object.
125 */
126 void
127 pserialize_destroy(pserialize_t psz)
128 {
129
130 KASSERT(psz->psz_owner == NULL);
131
132 kcpuset_destroy(psz->psz_target);
133 kcpuset_destroy(psz->psz_pass);
134 kmem_free(psz, sizeof(struct pserialize));
135 }
136
137 /*
138 * pserialize_perform:
139 *
140 * Perform the write side of passive serialization. The calling
141 * thread holds an exclusive lock on the data object(s) being updated.
142 * We wait until every processor in the system has made at least two
143 * passes through cpu_switchto(). The wait is made with the caller's
144 * update lock held, but is short term.
145 */
146 void
147 pserialize_perform(pserialize_t psz)
148 {
149 int n;
150 uint64_t xc;
151
152 KASSERT(!cpu_intr_p());
153 KASSERT(!cpu_softintr_p());
154
155 if (__predict_false(panicstr != NULL)) {
156 return;
157 }
158 KASSERT(psz->psz_owner == NULL);
159 KASSERT(ncpu > 0);
160
161 if (__predict_false(mp_online == false)) {
162 psz_ev_excl.ev_count++;
163 return;
164 }
165
166 /*
167 * Set up the object and put it onto the queue. The lock
168 * activity here provides the necessary memory barrier to
169 * make the caller's data update completely visible to
170 * other processors.
171 */
172 psz->psz_owner = curlwp;
173 kcpuset_copy(psz->psz_target, kcpuset_running);
174 kcpuset_zero(psz->psz_pass);
175
176 mutex_spin_enter(&psz_lock);
177 TAILQ_INSERT_TAIL(&psz_queue0, psz, psz_chain);
178 psz_work_todo++;
179
180 n = 0;
181 do {
182 mutex_spin_exit(&psz_lock);
183
184 /*
185 * Force some context switch activity on every CPU, as
186 * the system may not be busy. Pause to not flood.
187 */
188 if (n++ > 1)
189 kpause("psrlz", false, 1, NULL);
190 xc = xc_broadcast(XC_HIGHPRI, (xcfunc_t)nullop, NULL, NULL);
191 xc_wait(xc);
192
193 mutex_spin_enter(&psz_lock);
194 } while (!kcpuset_iszero(psz->psz_target));
195
196 psz_ev_excl.ev_count++;
197 mutex_spin_exit(&psz_lock);
198
199 psz->psz_owner = NULL;
200 }
201
202 int
203 pserialize_read_enter(void)
204 {
205 int s;
206
207 KASSERT(!cpu_intr_p());
208 s = splsoftserial();
209 #ifdef LOCKDEBUG
210 {
211 uint32_t *nreads;
212 nreads = percpu_getref(psz_debug_nreads);
213 (*nreads)++;
214 if (*nreads == 0)
215 panic("nreads overflow");
216 percpu_putref(psz_debug_nreads);
217 }
218 #endif
219 return s;
220 }
221
222 void
223 pserialize_read_exit(int s)
224 {
225
226 #ifdef LOCKDEBUG
227 {
228 uint32_t *nreads;
229 nreads = percpu_getref(psz_debug_nreads);
230 (*nreads)--;
231 if (*nreads == UINT_MAX)
232 panic("nreads underflow");
233 percpu_putref(psz_debug_nreads);
234 }
235 #endif
236 splx(s);
237 }
238
239 /*
240 * pserialize_switchpoint:
241 *
242 * Monitor system context switch activity. Called from machine
243 * independent code after mi_switch() returns.
244 */
245 void
246 pserialize_switchpoint(void)
247 {
248 pserialize_t psz, next;
249 cpuid_t cid;
250
251 /*
252 * If no updates pending, bail out. No need to lock in order to
253 * test psz_work_todo; the only ill effect of missing an update
254 * would be to delay LWPs waiting in pserialize_perform(). That
255 * will not happen because updates are on the queue before an
256 * xcall is generated (serialization) to tickle every CPU.
257 */
258 if (__predict_true(psz_work_todo == 0)) {
259 return;
260 }
261 mutex_spin_enter(&psz_lock);
262 cid = cpu_index(curcpu());
263
264 /*
265 * At first, scan through the second queue and update each request,
266 * if passed all processors, then transfer to the third queue.
267 */
268 for (psz = TAILQ_FIRST(&psz_queue1); psz != NULL; psz = next) {
269 next = TAILQ_NEXT(psz, psz_chain);
270 kcpuset_set(psz->psz_pass, cid);
271 if (!kcpuset_match(psz->psz_pass, psz->psz_target)) {
272 continue;
273 }
274 kcpuset_zero(psz->psz_pass);
275 TAILQ_REMOVE(&psz_queue1, psz, psz_chain);
276 TAILQ_INSERT_TAIL(&psz_queue2, psz, psz_chain);
277 }
278 /*
279 * Scan through the first queue and update each request,
280 * if passed all processors, then move to the second queue.
281 */
282 for (psz = TAILQ_FIRST(&psz_queue0); psz != NULL; psz = next) {
283 next = TAILQ_NEXT(psz, psz_chain);
284 kcpuset_set(psz->psz_pass, cid);
285 if (!kcpuset_match(psz->psz_pass, psz->psz_target)) {
286 continue;
287 }
288 kcpuset_zero(psz->psz_pass);
289 TAILQ_REMOVE(&psz_queue0, psz, psz_chain);
290 TAILQ_INSERT_TAIL(&psz_queue1, psz, psz_chain);
291 }
292 /*
293 * Process the third queue: entries have been seen twice on every
294 * processor, remove from the queue and notify the updating thread.
295 */
296 while ((psz = TAILQ_FIRST(&psz_queue2)) != NULL) {
297 TAILQ_REMOVE(&psz_queue2, psz, psz_chain);
298 kcpuset_zero(psz->psz_target);
299 psz_work_todo--;
300 }
301 mutex_spin_exit(&psz_lock);
302 }
303
304 /*
305 * pserialize_in_read_section:
306 *
307 * True if the caller is in a pserialize read section. To be used only
308 * for diagnostic assertions where we want to guarantee the condition like:
309 *
310 * KASSERT(pserialize_in_read_section());
311 */
312 bool
313 pserialize_in_read_section(void)
314 {
315 #ifdef LOCKDEBUG
316 uint32_t *nreads;
317 bool in;
318
319 /* Not initialized yet */
320 if (__predict_false(psz_debug_nreads == NULL))
321 return true;
322
323 nreads = percpu_getref(psz_debug_nreads);
324 in = *nreads != 0;
325 percpu_putref(psz_debug_nreads);
326
327 return in;
328 #else
329 return true;
330 #endif
331 }
332
333 /*
334 * pserialize_not_in_read_section:
335 *
336 * True if the caller is not in a pserialize read section. To be used only
337 * for diagnostic assertions where we want to guarantee the condition like:
338 *
339 * KASSERT(pserialize_not_in_read_section());
340 */
341 bool
342 pserialize_not_in_read_section(void)
343 {
344 #ifdef LOCKDEBUG
345 uint32_t *nreads;
346 bool notin;
347
348 /* Not initialized yet */
349 if (__predict_false(psz_debug_nreads == NULL))
350 return true;
351
352 nreads = percpu_getref(psz_debug_nreads);
353 notin = *nreads == 0;
354 percpu_putref(psz_debug_nreads);
355
356 return notin;
357 #else
358 return true;
359 #endif
360 }
361