1 1.188 andvar /* $NetBSD: kern_lock.c,v 1.188 2024/01/14 11:46:05 andvar Exp $ */ 2 1.19 thorpej 3 1.19 thorpej /*- 4 1.187 ad * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2020, 2023 5 1.187 ad * The NetBSD Foundation, Inc. 6 1.19 thorpej * All rights reserved. 7 1.19 thorpej * 8 1.19 thorpej * This code is derived from software contributed to The NetBSD Foundation 9 1.19 thorpej * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 10 1.105 ad * NASA Ames Research Center, and by Andrew Doran. 11 1.19 thorpej * 12 1.19 thorpej * Redistribution and use in source and binary forms, with or without 13 1.19 thorpej * modification, are permitted provided that the following conditions 14 1.19 thorpej * are met: 15 1.19 thorpej * 1. Redistributions of source code must retain the above copyright 16 1.19 thorpej * notice, this list of conditions and the following disclaimer. 17 1.19 thorpej * 2. Redistributions in binary form must reproduce the above copyright 18 1.19 thorpej * notice, this list of conditions and the following disclaimer in the 19 1.19 thorpej * documentation and/or other materials provided with the distribution. 20 1.19 thorpej * 21 1.19 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 1.19 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 1.19 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 1.19 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 1.19 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 1.19 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 1.19 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 1.19 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 1.19 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 1.19 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 1.19 thorpej * POSSIBILITY OF SUCH DAMAGE. 32 1.19 thorpej */ 33 1.2 fvdl 34 1.60 lukem #include <sys/cdefs.h> 35 1.188 andvar __KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.188 2024/01/14 11:46:05 andvar Exp $"); 36 1.168 ad 37 1.168 ad #ifdef _KERNEL_OPT 38 1.168 ad #include "opt_lockdebug.h" 39 1.168 ad #endif 40 1.105 ad 41 1.1 fvdl #include <sys/param.h> 42 1.1 fvdl #include <sys/proc.h> 43 1.1 fvdl #include <sys/lock.h> 44 1.2 fvdl #include <sys/systm.h> 45 1.125 ad #include <sys/kernel.h> 46 1.105 ad #include <sys/lockdebug.h> 47 1.122 ad #include <sys/cpu.h> 48 1.122 ad #include <sys/syslog.h> 49 1.128 ad #include <sys/atomic.h> 50 1.148 ad #include <sys/lwp.h> 51 1.160 ozaki #include <sys/pserialize.h> 52 1.105 ad 53 1.168 ad #if defined(DIAGNOSTIC) && !defined(LOCKDEBUG) 54 1.168 ad #include <sys/ksyms.h> 55 1.168 ad #endif 56 1.168 ad 57 1.131 ad #include <machine/lock.h> 58 1.1 fvdl 59 1.98 ad #include <dev/lockstat.h> 60 1.98 ad 61 1.134 ad #define RETURN_ADDRESS (uintptr_t)__builtin_return_address(0) 62 1.25 thorpej 63 1.127 yamt bool kernel_lock_dodebug; 64 1.132 ad 65 1.132 ad __cpu_simple_lock_t kernel_lock[CACHE_LINE_SIZE / sizeof(__cpu_simple_lock_t)] 66 1.153 matt __cacheline_aligned; 67 1.1 fvdl 68 1.96 yamt void 69 1.135 yamt assert_sleepable(void) 70 1.96 yamt { 71 1.135 yamt const char *reason; 72 1.187 ad long pctr; 73 1.148 ad bool idle; 74 1.96 yamt 75 1.182 ozaki if (__predict_false(panicstr != NULL)) { 76 1.117 ad return; 77 1.135 yamt } 78 1.135 yamt 79 1.132 ad LOCKDEBUG_BARRIER(kernel_lock, 1); 80 1.135 yamt 81 1.148 ad /* 82 1.148 ad * Avoid disabling/re-enabling preemption here since this 83 1.149 dyoung * routine may be called in delicate situations. 84 1.148 ad */ 85 1.148 ad do { 86 1.187 ad pctr = lwp_pctr(); 87 1.148 ad idle = CURCPU_IDLE_P(); 88 1.187 ad } while (__predict_false(pctr != lwp_pctr())); 89 1.148 ad 90 1.135 yamt reason = NULL; 91 1.184 riastrad if (__predict_false(idle) && !cold) { 92 1.135 yamt reason = "idle"; 93 1.184 riastrad goto panic; 94 1.135 yamt } 95 1.184 riastrad if (__predict_false(cpu_intr_p())) { 96 1.135 yamt reason = "interrupt"; 97 1.184 riastrad goto panic; 98 1.97 yamt } 99 1.184 riastrad if (__predict_false(cpu_softintr_p())) { 100 1.135 yamt reason = "softint"; 101 1.184 riastrad goto panic; 102 1.135 yamt } 103 1.184 riastrad if (__predict_false(!pserialize_not_in_read_section())) { 104 1.160 ozaki reason = "pserialize"; 105 1.184 riastrad goto panic; 106 1.160 ozaki } 107 1.184 riastrad return; 108 1.135 yamt 109 1.184 riastrad panic: panic("%s: %s caller=%p", __func__, reason, (void *)RETURN_ADDRESS); 110 1.96 yamt } 111 1.105 ad 112 1.62 thorpej /* 113 1.62 thorpej * Functions for manipulating the kernel_lock. We put them here 114 1.62 thorpej * so that they show up in profiles. 115 1.62 thorpej */ 116 1.62 thorpej 117 1.105 ad #define _KERNEL_LOCK_ABORT(msg) \ 118 1.158 christos LOCKDEBUG_ABORT(__func__, __LINE__, kernel_lock, &_kernel_lock_ops, msg) 119 1.186 riastrad 120 1.186 riastrad #ifdef LOCKDEBUG 121 1.105 ad #define _KERNEL_LOCK_ASSERT(cond) \ 122 1.105 ad do { \ 123 1.105 ad if (!(cond)) \ 124 1.105 ad _KERNEL_LOCK_ABORT("assertion failed: " #cond); \ 125 1.105 ad } while (/* CONSTCOND */ 0) 126 1.105 ad #else 127 1.186 riastrad #define _KERNEL_LOCK_ASSERT(cond) /* nothing */ 128 1.105 ad #endif 129 1.105 ad 130 1.163 ozaki static void _kernel_lock_dump(const volatile void *, lockop_printer_t); 131 1.105 ad 132 1.105 ad lockops_t _kernel_lock_ops = { 133 1.161 ozaki .lo_name = "Kernel lock", 134 1.161 ozaki .lo_type = LOCKOPS_SPIN, 135 1.161 ozaki .lo_dump = _kernel_lock_dump, 136 1.105 ad }; 137 1.105 ad 138 1.186 riastrad #ifdef LOCKDEBUG 139 1.186 riastrad 140 1.188 andvar #ifdef DDB 141 1.186 riastrad #include <ddb/ddb.h> 142 1.188 andvar #endif 143 1.186 riastrad 144 1.174 riastrad static void 145 1.174 riastrad kernel_lock_trace_ipi(void *cookie) 146 1.174 riastrad { 147 1.174 riastrad 148 1.174 riastrad printf("%s[%d %s]: hogging kernel lock\n", cpu_name(curcpu()), 149 1.174 riastrad curlwp->l_lid, 150 1.174 riastrad curlwp->l_name ? curlwp->l_name : curproc->p_comm); 151 1.188 andvar #ifdef DDB 152 1.174 riastrad db_stacktrace(); 153 1.188 andvar #endif 154 1.186 riastrad } 155 1.186 riastrad 156 1.185 riastrad #endif 157 1.174 riastrad 158 1.85 yamt /* 159 1.105 ad * Initialize the kernel lock. 160 1.85 yamt */ 161 1.62 thorpej void 162 1.122 ad kernel_lock_init(void) 163 1.62 thorpej { 164 1.62 thorpej 165 1.132 ad __cpu_simple_lock_init(kernel_lock); 166 1.132 ad kernel_lock_dodebug = LOCKDEBUG_ALLOC(kernel_lock, &_kernel_lock_ops, 167 1.122 ad RETURN_ADDRESS); 168 1.62 thorpej } 169 1.155 martin CTASSERT(CACHE_LINE_SIZE >= sizeof(__cpu_simple_lock_t)); 170 1.62 thorpej 171 1.62 thorpej /* 172 1.105 ad * Print debugging information about the kernel lock. 173 1.62 thorpej */ 174 1.162 ozaki static void 175 1.163 ozaki _kernel_lock_dump(const volatile void *junk, lockop_printer_t pr) 176 1.62 thorpej { 177 1.85 yamt struct cpu_info *ci = curcpu(); 178 1.62 thorpej 179 1.105 ad (void)junk; 180 1.85 yamt 181 1.163 ozaki pr("curcpu holds : %18d wanted by: %#018lx\n", 182 1.105 ad ci->ci_biglock_count, (long)ci->ci_biglock_wanted); 183 1.62 thorpej } 184 1.62 thorpej 185 1.105 ad /* 186 1.150 mrg * Acquire 'nlocks' holds on the kernel lock. 187 1.167 ad * 188 1.167 ad * Although it may not look it, this is one of the most central, intricate 189 1.167 ad * routines in the kernel, and tons of code elsewhere depends on its exact 190 1.167 ad * behaviour. If you change something in here, expect it to bite you in the 191 1.167 ad * rear. 192 1.105 ad */ 193 1.62 thorpej void 194 1.137 drochner _kernel_lock(int nlocks) 195 1.62 thorpej { 196 1.138 ad struct cpu_info *ci; 197 1.105 ad LOCKSTAT_TIMER(spintime); 198 1.105 ad LOCKSTAT_FLAG(lsflag); 199 1.105 ad struct lwp *owant; 200 1.186 riastrad #ifdef LOCKDEBUG 201 1.174 riastrad static struct cpu_info *kernel_lock_holder; 202 1.165 ad u_int spins = 0; 203 1.180 riastrad u_int starttime = getticks(); 204 1.186 riastrad #endif 205 1.85 yamt int s; 206 1.137 drochner struct lwp *l = curlwp; 207 1.85 yamt 208 1.105 ad _KERNEL_LOCK_ASSERT(nlocks > 0); 209 1.62 thorpej 210 1.138 ad s = splvm(); 211 1.138 ad ci = curcpu(); 212 1.105 ad if (ci->ci_biglock_count != 0) { 213 1.132 ad _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock)); 214 1.105 ad ci->ci_biglock_count += nlocks; 215 1.122 ad l->l_blcnt += nlocks; 216 1.138 ad splx(s); 217 1.105 ad return; 218 1.105 ad } 219 1.105 ad 220 1.122 ad _KERNEL_LOCK_ASSERT(l->l_blcnt == 0); 221 1.132 ad LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS, 222 1.154 mlelstv 0); 223 1.107 ad 224 1.165 ad if (__predict_true(__cpu_simple_lock_try(kernel_lock))) { 225 1.177 riastrad #ifdef LOCKDEBUG 226 1.176 riastrad kernel_lock_holder = curcpu(); 227 1.177 riastrad #endif 228 1.105 ad ci->ci_biglock_count = nlocks; 229 1.122 ad l->l_blcnt = nlocks; 230 1.144 ad LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL, 231 1.127 yamt RETURN_ADDRESS, 0); 232 1.105 ad splx(s); 233 1.105 ad return; 234 1.105 ad } 235 1.105 ad 236 1.132 ad /* 237 1.132 ad * To remove the ordering constraint between adaptive mutexes 238 1.132 ad * and kernel_lock we must make it appear as if this thread is 239 1.132 ad * blocking. For non-interlocked mutex release, a store fence 240 1.132 ad * is required to ensure that the result of any mutex_exit() 241 1.132 ad * by the current LWP becomes visible on the bus before the set 242 1.132 ad * of ci->ci_biglock_wanted becomes visible. 243 1.183 riastrad * 244 1.183 riastrad * This membar_producer matches the membar_consumer in 245 1.183 riastrad * mutex_vector_enter. 246 1.183 riastrad * 247 1.183 riastrad * That way, if l has just released a mutex, mutex_vector_enter 248 1.183 riastrad * can't see this store ci->ci_biglock_wanted := l until it 249 1.183 riastrad * will also see the mutex_exit store mtx->mtx_owner := 0 which 250 1.183 riastrad * clears the has-waiters bit. 251 1.132 ad */ 252 1.132 ad membar_producer(); 253 1.132 ad owant = ci->ci_biglock_wanted; 254 1.183 riastrad atomic_store_relaxed(&ci->ci_biglock_wanted, l); 255 1.168 ad #if defined(DIAGNOSTIC) && !defined(LOCKDEBUG) 256 1.168 ad l->l_ld_wanted = __builtin_return_address(0); 257 1.168 ad #endif 258 1.105 ad 259 1.105 ad /* 260 1.167 ad * Spin until we acquire the lock. Once we have it, record the 261 1.167 ad * time spent with lockstat. 262 1.105 ad */ 263 1.132 ad LOCKSTAT_ENTER(lsflag); 264 1.132 ad LOCKSTAT_START_TIMER(lsflag, spintime); 265 1.105 ad 266 1.105 ad do { 267 1.122 ad splx(s); 268 1.132 ad while (__SIMPLELOCK_LOCKED_P(kernel_lock)) { 269 1.165 ad #ifdef LOCKDEBUG 270 1.180 riastrad if (SPINLOCK_SPINOUT(spins) && start_init_exec && 271 1.180 riastrad (getticks() - starttime) > 10*hz) { 272 1.174 riastrad ipi_msg_t msg = { 273 1.174 riastrad .func = kernel_lock_trace_ipi, 274 1.174 riastrad }; 275 1.175 riastrad kpreempt_disable(); 276 1.174 riastrad ipi_unicast(&msg, kernel_lock_holder); 277 1.174 riastrad ipi_wait(&msg); 278 1.175 riastrad kpreempt_enable(); 279 1.178 riastrad _KERNEL_LOCK_ABORT("spinout"); 280 1.132 ad } 281 1.179 riastrad #endif 282 1.169 christos SPINLOCK_BACKOFF_HOOK; 283 1.169 christos SPINLOCK_SPIN_HOOK; 284 1.105 ad } 285 1.132 ad s = splvm(); 286 1.132 ad } while (!__cpu_simple_lock_try(kernel_lock)); 287 1.105 ad 288 1.122 ad ci->ci_biglock_count = nlocks; 289 1.122 ad l->l_blcnt = nlocks; 290 1.107 ad LOCKSTAT_STOP_TIMER(lsflag, spintime); 291 1.144 ad LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL, 292 1.144 ad RETURN_ADDRESS, 0); 293 1.132 ad if (owant == NULL) { 294 1.132 ad LOCKSTAT_EVENT_RA(lsflag, kernel_lock, 295 1.132 ad LB_KERNEL_LOCK | LB_SPIN, 1, spintime, RETURN_ADDRESS); 296 1.132 ad } 297 1.132 ad LOCKSTAT_EXIT(lsflag); 298 1.167 ad splx(s); 299 1.105 ad 300 1.105 ad /* 301 1.132 ad * Now that we have kernel_lock, reset ci_biglock_wanted. This 302 1.183 riastrad * store must be visible on other CPUs before a mutex_exit() on 303 1.183 riastrad * this CPU can test the has-waiters bit. 304 1.183 riastrad * 305 1.183 riastrad * This membar_enter matches the membar_enter in 306 1.183 riastrad * mutex_vector_enter. (Yes, not membar_exit -- the legacy 307 1.183 riastrad * naming is confusing, but store-before-load usually pairs 308 1.183 riastrad * with store-before-load, in the extremely rare cases where it 309 1.183 riastrad * is used at all.) 310 1.132 ad * 311 1.183 riastrad * That way, mutex_vector_enter can't see this store 312 1.183 riastrad * ci->ci_biglock_wanted := owant until it has set the 313 1.183 riastrad * has-waiters bit. 314 1.105 ad */ 315 1.132 ad (void)atomic_swap_ptr(&ci->ci_biglock_wanted, owant); 316 1.165 ad #ifndef __HAVE_ATOMIC_AS_MEMBAR 317 1.132 ad membar_enter(); 318 1.165 ad #endif 319 1.174 riastrad 320 1.174 riastrad #ifdef LOCKDEBUG 321 1.174 riastrad kernel_lock_holder = curcpu(); 322 1.174 riastrad #endif 323 1.62 thorpej } 324 1.62 thorpej 325 1.62 thorpej /* 326 1.105 ad * Release 'nlocks' holds on the kernel lock. If 'nlocks' is zero, release 327 1.150 mrg * all holds. 328 1.62 thorpej */ 329 1.62 thorpej void 330 1.137 drochner _kernel_unlock(int nlocks, int *countp) 331 1.62 thorpej { 332 1.138 ad struct cpu_info *ci; 333 1.105 ad u_int olocks; 334 1.105 ad int s; 335 1.137 drochner struct lwp *l = curlwp; 336 1.62 thorpej 337 1.105 ad _KERNEL_LOCK_ASSERT(nlocks < 2); 338 1.62 thorpej 339 1.122 ad olocks = l->l_blcnt; 340 1.77 yamt 341 1.105 ad if (olocks == 0) { 342 1.105 ad _KERNEL_LOCK_ASSERT(nlocks <= 0); 343 1.105 ad if (countp != NULL) 344 1.105 ad *countp = 0; 345 1.105 ad return; 346 1.105 ad } 347 1.77 yamt 348 1.132 ad _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock)); 349 1.85 yamt 350 1.105 ad if (nlocks == 0) 351 1.105 ad nlocks = olocks; 352 1.105 ad else if (nlocks == -1) { 353 1.105 ad nlocks = 1; 354 1.105 ad _KERNEL_LOCK_ASSERT(olocks == 1); 355 1.105 ad } 356 1.138 ad s = splvm(); 357 1.138 ad ci = curcpu(); 358 1.122 ad _KERNEL_LOCK_ASSERT(ci->ci_biglock_count >= l->l_blcnt); 359 1.122 ad if (ci->ci_biglock_count == nlocks) { 360 1.132 ad LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, kernel_lock, 361 1.127 yamt RETURN_ADDRESS, 0); 362 1.122 ad ci->ci_biglock_count = 0; 363 1.132 ad __cpu_simple_unlock(kernel_lock); 364 1.138 ad l->l_blcnt -= nlocks; 365 1.122 ad splx(s); 366 1.139 ad if (l->l_dopreempt) 367 1.139 ad kpreempt(0); 368 1.138 ad } else { 369 1.122 ad ci->ci_biglock_count -= nlocks; 370 1.138 ad l->l_blcnt -= nlocks; 371 1.138 ad splx(s); 372 1.138 ad } 373 1.77 yamt 374 1.105 ad if (countp != NULL) 375 1.105 ad *countp = olocks; 376 1.77 yamt } 377 1.152 jmcneill 378 1.152 jmcneill bool 379 1.152 jmcneill _kernel_locked_p(void) 380 1.152 jmcneill { 381 1.152 jmcneill return __SIMPLELOCK_LOCKED_P(kernel_lock); 382 1.152 jmcneill } 383