1 1.134 ad /* $NetBSD: uvm_pdaemon.c,v 1.134 2023/09/10 15:01:11 ad Exp $ */ 2 1.1 mrg 3 1.34 chs /* 4 1.1 mrg * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 1.34 chs * Copyright (c) 1991, 1993, The Regents of the University of California. 6 1.1 mrg * 7 1.1 mrg * All rights reserved. 8 1.1 mrg * 9 1.1 mrg * This code is derived from software contributed to Berkeley by 10 1.1 mrg * The Mach Operating System project at Carnegie-Mellon University. 11 1.1 mrg * 12 1.1 mrg * Redistribution and use in source and binary forms, with or without 13 1.1 mrg * modification, are permitted provided that the following conditions 14 1.1 mrg * are met: 15 1.1 mrg * 1. Redistributions of source code must retain the above copyright 16 1.1 mrg * notice, this list of conditions and the following disclaimer. 17 1.1 mrg * 2. Redistributions in binary form must reproduce the above copyright 18 1.1 mrg * notice, this list of conditions and the following disclaimer in the 19 1.1 mrg * documentation and/or other materials provided with the distribution. 20 1.102 chuck * 3. Neither the name of the University nor the names of its contributors 21 1.1 mrg * may be used to endorse or promote products derived from this software 22 1.1 mrg * without specific prior written permission. 23 1.1 mrg * 24 1.1 mrg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 1.1 mrg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 1.1 mrg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 1.1 mrg * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 1.1 mrg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 1.1 mrg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 1.1 mrg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 1.1 mrg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 1.1 mrg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 1.1 mrg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 1.1 mrg * SUCH DAMAGE. 35 1.1 mrg * 36 1.1 mrg * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94 37 1.4 mrg * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp 38 1.1 mrg * 39 1.1 mrg * 40 1.1 mrg * Copyright (c) 1987, 1990 Carnegie-Mellon University. 41 1.1 mrg * All rights reserved. 42 1.34 chs * 43 1.1 mrg * Permission to use, copy, modify and distribute this software and 44 1.1 mrg * its documentation is hereby granted, provided that both the copyright 45 1.1 mrg * notice and this permission notice appear in all copies of the 46 1.1 mrg * software, derivative works or modified versions, and any portions 47 1.1 mrg * thereof, and that both notices appear in supporting documentation. 48 1.34 chs * 49 1.34 chs * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 50 1.34 chs * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 51 1.1 mrg * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 52 1.34 chs * 53 1.1 mrg * Carnegie Mellon requests users of this software to return to 54 1.1 mrg * 55 1.1 mrg * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU 56 1.1 mrg * School of Computer Science 57 1.1 mrg * Carnegie Mellon University 58 1.1 mrg * Pittsburgh PA 15213-3890 59 1.1 mrg * 60 1.1 mrg * any improvements or extensions that they make and grant Carnegie the 61 1.1 mrg * rights to redistribute these changes. 62 1.1 mrg */ 63 1.1 mrg 64 1.1 mrg /* 65 1.1 mrg * uvm_pdaemon.c: the page daemon 66 1.1 mrg */ 67 1.42 lukem 68 1.42 lukem #include <sys/cdefs.h> 69 1.134 ad __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.134 2023/09/10 15:01:11 ad Exp $"); 70 1.42 lukem 71 1.42 lukem #include "opt_uvmhist.h" 72 1.69 yamt #include "opt_readahead.h" 73 1.1 mrg 74 1.125 ad #define __RWLOCK_PRIVATE 75 1.125 ad 76 1.1 mrg #include <sys/param.h> 77 1.1 mrg #include <sys/proc.h> 78 1.1 mrg #include <sys/systm.h> 79 1.1 mrg #include <sys/kernel.h> 80 1.9 pk #include <sys/pool.h> 81 1.24 chs #include <sys/buf.h> 82 1.94 ad #include <sys/module.h> 83 1.96 ad #include <sys/atomic.h> 84 1.110 chs #include <sys/kthread.h> 85 1.1 mrg 86 1.1 mrg #include <uvm/uvm.h> 87 1.77 yamt #include <uvm/uvm_pdpolicy.h> 88 1.119 ad #include <uvm/uvm_pgflcache.h> 89 1.1 mrg 90 1.107 matt #ifdef UVMHIST 91 1.133 mrg #ifndef UVMHIST_PDHIST_SIZE 92 1.133 mrg #define UVMHIST_PDHIST_SIZE 100 93 1.133 mrg #endif 94 1.132 mrg static struct kern_history_ent pdhistbuf[UVMHIST_PDHIST_SIZE]; 95 1.132 mrg UVMHIST_DEFINE(pdhist) = UVMHIST_INITIALIZER(pdhisthist, pdhistbuf); 96 1.107 matt #endif 97 1.107 matt 98 1.1 mrg /* 99 1.45 wiz * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate 100 1.14 chs * in a pass thru the inactive list when swap is full. the value should be 101 1.14 chs * "small"... if it's too large we'll cycle the active pages thru the inactive 102 1.14 chs * queue too quickly to for them to be referenced and avoid being freed. 103 1.14 chs */ 104 1.14 chs 105 1.89 ad #define UVMPD_NUMDIRTYREACTS 16 106 1.14 chs 107 1.14 chs /* 108 1.1 mrg * local prototypes 109 1.1 mrg */ 110 1.1 mrg 111 1.65 thorpej static void uvmpd_scan(void); 112 1.77 yamt static void uvmpd_scan_queue(void); 113 1.65 thorpej static void uvmpd_tune(void); 114 1.110 chs static void uvmpd_pool_drain_thread(void *); 115 1.110 chs static void uvmpd_pool_drain_wakeup(void); 116 1.1 mrg 117 1.101 pooka static unsigned int uvm_pagedaemon_waiters; 118 1.89 ad 119 1.110 chs /* State for the pool drainer thread */ 120 1.117 ad static kmutex_t uvmpd_lock __cacheline_aligned; 121 1.110 chs static kcondvar_t uvmpd_pool_drain_cv; 122 1.110 chs static bool uvmpd_pool_drain_run = false; 123 1.110 chs 124 1.1 mrg /* 125 1.61 chs * XXX hack to avoid hangs when large processes fork. 126 1.61 chs */ 127 1.96 ad u_int uvm_extrapages; 128 1.61 chs 129 1.61 chs /* 130 1.1 mrg * uvm_wait: wait (sleep) for the page daemon to free some pages 131 1.1 mrg * 132 1.1 mrg * => should be called with all locks released 133 1.1 mrg * => should _not_ be called by the page daemon (to avoid deadlock) 134 1.1 mrg */ 135 1.1 mrg 136 1.19 thorpej void 137 1.65 thorpej uvm_wait(const char *wmsg) 138 1.8 mrg { 139 1.8 mrg int timo = 0; 140 1.89 ad 141 1.111 chs if (uvm.pagedaemon_lwp == NULL) 142 1.111 chs panic("out of memory before the pagedaemon thread exists"); 143 1.111 chs 144 1.117 ad mutex_spin_enter(&uvmpd_lock); 145 1.1 mrg 146 1.8 mrg /* 147 1.8 mrg * check for page daemon going to sleep (waiting for itself) 148 1.8 mrg */ 149 1.1 mrg 150 1.86 ad if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == 0) { 151 1.8 mrg /* 152 1.8 mrg * now we have a problem: the pagedaemon wants to go to 153 1.8 mrg * sleep until it frees more memory. but how can it 154 1.8 mrg * free more memory if it is asleep? that is a deadlock. 155 1.8 mrg * we have two options: 156 1.8 mrg * [1] panic now 157 1.8 mrg * [2] put a timeout on the sleep, thus causing the 158 1.8 mrg * pagedaemon to only pause (rather than sleep forever) 159 1.8 mrg * 160 1.8 mrg * note that option [2] will only help us if we get lucky 161 1.8 mrg * and some other process on the system breaks the deadlock 162 1.8 mrg * by exiting or freeing memory (thus allowing the pagedaemon 163 1.8 mrg * to continue). for now we panic if DEBUG is defined, 164 1.8 mrg * otherwise we hope for the best with option [2] (better 165 1.8 mrg * yet, this should never happen in the first place!). 166 1.8 mrg */ 167 1.1 mrg 168 1.8 mrg printf("pagedaemon: deadlock detected!\n"); 169 1.8 mrg timo = hz >> 3; /* set timeout */ 170 1.1 mrg #if defined(DEBUG) 171 1.8 mrg /* DEBUG: panic so we can debug it */ 172 1.8 mrg panic("pagedaemon deadlock"); 173 1.1 mrg #endif 174 1.8 mrg } 175 1.1 mrg 176 1.89 ad uvm_pagedaemon_waiters++; 177 1.17 thorpej wakeup(&uvm.pagedaemon); /* wake the daemon! */ 178 1.117 ad UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvmpd_lock, false, wmsg, timo); 179 1.1 mrg } 180 1.1 mrg 181 1.77 yamt /* 182 1.77 yamt * uvm_kick_pdaemon: perform checks to determine if we need to 183 1.77 yamt * give the pagedaemon a nudge, and do so if necessary. 184 1.77 yamt */ 185 1.77 yamt 186 1.77 yamt void 187 1.77 yamt uvm_kick_pdaemon(void) 188 1.77 yamt { 189 1.128 ad int fpages = uvm_availmem(false); 190 1.77 yamt 191 1.117 ad if (fpages + uvmexp.paging < uvmexp.freemin || 192 1.117 ad (fpages + uvmexp.paging < uvmexp.freetarg && 193 1.105 para uvmpdpol_needsscan_p()) || 194 1.105 para uvm_km_va_starved_p()) { 195 1.117 ad mutex_spin_enter(&uvmpd_lock); 196 1.77 yamt wakeup(&uvm.pagedaemon); 197 1.117 ad mutex_spin_exit(&uvmpd_lock); 198 1.77 yamt } 199 1.77 yamt } 200 1.1 mrg 201 1.1 mrg /* 202 1.1 mrg * uvmpd_tune: tune paging parameters 203 1.1 mrg * 204 1.1 mrg * => called when ever memory is added (or removed?) to the system 205 1.1 mrg */ 206 1.1 mrg 207 1.65 thorpej static void 208 1.37 chs uvmpd_tune(void) 209 1.8 mrg { 210 1.95 ad int val; 211 1.95 ad 212 1.130 skrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist); 213 1.1 mrg 214 1.93 ad /* 215 1.93 ad * try to keep 0.5% of available RAM free, but limit to between 216 1.93 ad * 128k and 1024k per-CPU. XXX: what are these values good for? 217 1.93 ad */ 218 1.95 ad val = uvmexp.npages / 200; 219 1.95 ad val = MAX(val, (128*1024) >> PAGE_SHIFT); 220 1.95 ad val = MIN(val, (1024*1024) >> PAGE_SHIFT); 221 1.95 ad val *= ncpu; 222 1.23 bjh21 223 1.23 bjh21 /* Make sure there's always a user page free. */ 224 1.95 ad if (val < uvmexp.reserve_kernel + 1) 225 1.95 ad val = uvmexp.reserve_kernel + 1; 226 1.95 ad uvmexp.freemin = val; 227 1.95 ad 228 1.96 ad /* Calculate free target. */ 229 1.95 ad val = (uvmexp.freemin * 4) / 3; 230 1.95 ad if (val <= uvmexp.freemin) 231 1.95 ad val = uvmexp.freemin + 1; 232 1.96 ad uvmexp.freetarg = val + atomic_swap_uint(&uvm_extrapages, 0); 233 1.61 chs 234 1.8 mrg uvmexp.wiredmax = uvmexp.npages / 3; 235 1.109 pgoyette UVMHIST_LOG(pdhist, "<- done, freemin=%jd, freetarg=%jd, wiredmax=%jd", 236 1.1 mrg uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0); 237 1.1 mrg } 238 1.1 mrg 239 1.1 mrg /* 240 1.1 mrg * uvm_pageout: the main loop for the pagedaemon 241 1.1 mrg */ 242 1.1 mrg 243 1.8 mrg void 244 1.80 yamt uvm_pageout(void *arg) 245 1.8 mrg { 246 1.110 chs int npages = 0; 247 1.61 chs int extrapages = 0; 248 1.117 ad int fpages; 249 1.130 skrll 250 1.130 skrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist); 251 1.24 chs 252 1.8 mrg UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0); 253 1.8 mrg 254 1.117 ad mutex_init(&uvmpd_lock, MUTEX_DEFAULT, IPL_VM); 255 1.110 chs cv_init(&uvmpd_pool_drain_cv, "pooldrain"); 256 1.110 chs 257 1.110 chs /* Create the pool drainer kernel thread. */ 258 1.110 chs if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, 259 1.110 chs uvmpd_pool_drain_thread, NULL, NULL, "pooldrain")) 260 1.110 chs panic("fork pooldrain"); 261 1.110 chs 262 1.8 mrg /* 263 1.8 mrg * ensure correct priority and set paging parameters... 264 1.8 mrg */ 265 1.8 mrg 266 1.86 ad uvm.pagedaemon_lwp = curlwp; 267 1.8 mrg npages = uvmexp.npages; 268 1.8 mrg uvmpd_tune(); 269 1.8 mrg 270 1.8 mrg /* 271 1.8 mrg * main loop 272 1.8 mrg */ 273 1.24 chs 274 1.24 chs for (;;) { 275 1.105 para bool needsscan, needsfree, kmem_va_starved; 276 1.105 para 277 1.105 para kmem_va_starved = uvm_km_va_starved_p(); 278 1.24 chs 279 1.117 ad mutex_spin_enter(&uvmpd_lock); 280 1.105 para if ((uvm_pagedaemon_waiters == 0 || uvmexp.paging > 0) && 281 1.105 para !kmem_va_starved) { 282 1.89 ad UVMHIST_LOG(pdhist," <<SLEEPING>>",0,0,0,0); 283 1.89 ad UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon, 284 1.117 ad &uvmpd_lock, false, "pgdaemon", 0); 285 1.89 ad uvmexp.pdwoke++; 286 1.89 ad UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0); 287 1.89 ad } else { 288 1.117 ad mutex_spin_exit(&uvmpd_lock); 289 1.89 ad } 290 1.24 chs 291 1.8 mrg /* 292 1.113 ad * now recompute inactive count 293 1.8 mrg */ 294 1.8 mrg 295 1.61 chs if (npages != uvmexp.npages || extrapages != uvm_extrapages) { 296 1.24 chs npages = uvmexp.npages; 297 1.61 chs extrapages = uvm_extrapages; 298 1.24 chs uvmpd_tune(); 299 1.24 chs } 300 1.24 chs 301 1.77 yamt uvmpdpol_tune(); 302 1.24 chs 303 1.60 enami /* 304 1.60 enami * Estimate a hint. Note that bufmem are returned to 305 1.60 enami * system only when entire pool page is empty. 306 1.60 enami */ 307 1.128 ad fpages = uvm_availmem(false); 308 1.109 pgoyette UVMHIST_LOG(pdhist," free/ftarg=%jd/%jd", 309 1.117 ad fpages, uvmexp.freetarg, 0,0); 310 1.8 mrg 311 1.117 ad needsfree = fpages + uvmexp.paging < uvmexp.freetarg; 312 1.93 ad needsscan = needsfree || uvmpdpol_needsscan_p(); 313 1.89 ad 314 1.8 mrg /* 315 1.24 chs * scan if needed 316 1.8 mrg */ 317 1.97 ad if (needsscan) { 318 1.24 chs uvmpd_scan(); 319 1.97 ad } 320 1.8 mrg 321 1.8 mrg /* 322 1.24 chs * if there's any free memory to be had, 323 1.24 chs * wake up any waiters. 324 1.8 mrg */ 325 1.128 ad if (uvm_availmem(false) > uvmexp.reserve_kernel || 326 1.121 ad uvmexp.paging == 0) { 327 1.117 ad mutex_spin_enter(&uvmpd_lock); 328 1.24 chs wakeup(&uvmexp.free); 329 1.89 ad uvm_pagedaemon_waiters = 0; 330 1.117 ad mutex_spin_exit(&uvmpd_lock); 331 1.8 mrg } 332 1.1 mrg 333 1.8 mrg /* 334 1.113 ad * scan done. if we don't need free memory, we're done. 335 1.93 ad */ 336 1.93 ad 337 1.105 para if (!needsfree && !kmem_va_starved) 338 1.93 ad continue; 339 1.93 ad 340 1.93 ad /* 341 1.110 chs * kick the pool drainer thread. 342 1.38 chs */ 343 1.57 jdolecek 344 1.110 chs uvmpd_pool_drain_wakeup(); 345 1.24 chs } 346 1.24 chs /*NOTREACHED*/ 347 1.24 chs } 348 1.24 chs 349 1.89 ad void 350 1.89 ad uvm_pageout_start(int npages) 351 1.89 ad { 352 1.89 ad 353 1.113 ad atomic_add_int(&uvmexp.paging, npages); 354 1.89 ad } 355 1.89 ad 356 1.89 ad void 357 1.89 ad uvm_pageout_done(int npages) 358 1.89 ad { 359 1.89 ad 360 1.127 ad KASSERT(atomic_load_relaxed(&uvmexp.paging) >= npages); 361 1.127 ad 362 1.127 ad if (npages == 0) { 363 1.127 ad return; 364 1.127 ad } 365 1.127 ad 366 1.113 ad atomic_add_int(&uvmexp.paging, -npages); 367 1.89 ad 368 1.89 ad /* 369 1.89 ad * wake up either of pagedaemon or LWPs waiting for it. 370 1.89 ad */ 371 1.89 ad 372 1.117 ad mutex_spin_enter(&uvmpd_lock); 373 1.128 ad if (uvm_availmem(false) <= uvmexp.reserve_kernel) { 374 1.81 yamt wakeup(&uvm.pagedaemon); 375 1.117 ad } else if (uvm_pagedaemon_waiters != 0) { 376 1.81 yamt wakeup(&uvmexp.free); 377 1.89 ad uvm_pagedaemon_waiters = 0; 378 1.8 mrg } 379 1.117 ad mutex_spin_exit(&uvmpd_lock); 380 1.1 mrg } 381 1.1 mrg 382 1.131 chs static krwlock_t * 383 1.131 chs uvmpd_page_owner_lock(struct vm_page *pg) 384 1.131 chs { 385 1.131 chs struct uvm_object *uobj = pg->uobject; 386 1.131 chs struct vm_anon *anon = pg->uanon; 387 1.131 chs krwlock_t *slock; 388 1.131 chs 389 1.131 chs KASSERT(mutex_owned(&pg->interlock)); 390 1.131 chs 391 1.131 chs #ifdef DEBUG 392 1.131 chs if (uobj == (void *)0xdeadbeef || anon == (void *)0xdeadbeef) { 393 1.131 chs return NULL; 394 1.131 chs } 395 1.131 chs #endif 396 1.131 chs if (uobj != NULL) { 397 1.131 chs slock = uobj->vmobjlock; 398 1.131 chs KASSERTMSG(slock != NULL, "pg %p uobj %p, NULL lock", pg, uobj); 399 1.131 chs } else if (anon != NULL) { 400 1.131 chs slock = anon->an_lock; 401 1.131 chs KASSERTMSG(slock != NULL, "pg %p anon %p, NULL lock", pg, anon); 402 1.131 chs } else { 403 1.131 chs slock = NULL; 404 1.131 chs } 405 1.131 chs return slock; 406 1.131 chs } 407 1.131 chs 408 1.76 yamt /* 409 1.76 yamt * uvmpd_trylockowner: trylock the page's owner. 410 1.76 yamt * 411 1.113 ad * => called with page interlock held. 412 1.76 yamt * => resolve orphaned O->A loaned page. 413 1.89 ad * => return the locked mutex on success. otherwise, return NULL. 414 1.76 yamt */ 415 1.76 yamt 416 1.125 ad krwlock_t * 417 1.76 yamt uvmpd_trylockowner(struct vm_page *pg) 418 1.76 yamt { 419 1.134 ad krwlock_t *slock, *heldslock = NULL; 420 1.89 ad 421 1.113 ad KASSERT(mutex_owned(&pg->interlock)); 422 1.76 yamt 423 1.131 chs slock = uvmpd_page_owner_lock(pg); 424 1.131 chs if (slock == NULL) { 425 1.113 ad /* Page may be in state of flux - ignore. */ 426 1.113 ad mutex_exit(&pg->interlock); 427 1.113 ad return NULL; 428 1.76 yamt } 429 1.76 yamt 430 1.131 chs if (rw_tryenter(slock, RW_WRITER)) { 431 1.131 chs goto success; 432 1.131 chs } 433 1.131 chs 434 1.113 ad /* 435 1.131 chs * The try-lock didn't work, so now do a blocking lock after 436 1.131 chs * dropping the page interlock. Prevent the owner lock from 437 1.131 chs * being freed by taking a hold on it first. 438 1.113 ad */ 439 1.131 chs 440 1.131 chs rw_obj_hold(slock); 441 1.131 chs mutex_exit(&pg->interlock); 442 1.131 chs rw_enter(slock, RW_WRITER); 443 1.131 chs heldslock = slock; 444 1.76 yamt 445 1.113 ad /* 446 1.131 chs * Now we hold some owner lock. Check if the lock we hold 447 1.131 chs * is still the lock for the owner of the page. 448 1.131 chs * If it is then return it, otherwise release it and return NULL. 449 1.113 ad */ 450 1.131 chs 451 1.131 chs mutex_enter(&pg->interlock); 452 1.131 chs slock = uvmpd_page_owner_lock(pg); 453 1.131 chs if (heldslock != slock) { 454 1.131 chs rw_exit(heldslock); 455 1.131 chs slock = NULL; 456 1.134 ad } else { 457 1.131 chs success: 458 1.131 chs /* 459 1.131 chs * Set PG_ANON if it isn't set already. 460 1.131 chs */ 461 1.131 chs if (pg->uobject == NULL && (pg->flags & PG_ANON) == 0) { 462 1.131 chs KASSERT(pg->loan_count > 0); 463 1.131 chs pg->loan_count--; 464 1.131 chs pg->flags |= PG_ANON; 465 1.131 chs /* anon now owns it */ 466 1.76 yamt } 467 1.76 yamt } 468 1.131 chs mutex_exit(&pg->interlock); 469 1.134 ad if (heldslock != NULL) { 470 1.134 ad rw_obj_free(heldslock); 471 1.134 ad } 472 1.131 chs return slock; 473 1.76 yamt } 474 1.76 yamt 475 1.73 yamt #if defined(VMSWAP) 476 1.73 yamt struct swapcluster { 477 1.73 yamt int swc_slot; 478 1.73 yamt int swc_nallocated; 479 1.73 yamt int swc_nused; 480 1.75 yamt struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)]; 481 1.73 yamt }; 482 1.73 yamt 483 1.73 yamt static void 484 1.73 yamt swapcluster_init(struct swapcluster *swc) 485 1.73 yamt { 486 1.73 yamt 487 1.73 yamt swc->swc_slot = 0; 488 1.89 ad swc->swc_nused = 0; 489 1.73 yamt } 490 1.73 yamt 491 1.73 yamt static int 492 1.73 yamt swapcluster_allocslots(struct swapcluster *swc) 493 1.73 yamt { 494 1.73 yamt int slot; 495 1.73 yamt int npages; 496 1.73 yamt 497 1.73 yamt if (swc->swc_slot != 0) { 498 1.73 yamt return 0; 499 1.73 yamt } 500 1.73 yamt 501 1.73 yamt /* Even with strange MAXPHYS, the shift 502 1.73 yamt implicitly rounds down to a page. */ 503 1.73 yamt npages = MAXPHYS >> PAGE_SHIFT; 504 1.84 thorpej slot = uvm_swap_alloc(&npages, true); 505 1.73 yamt if (slot == 0) { 506 1.73 yamt return ENOMEM; 507 1.73 yamt } 508 1.73 yamt swc->swc_slot = slot; 509 1.73 yamt swc->swc_nallocated = npages; 510 1.73 yamt swc->swc_nused = 0; 511 1.73 yamt 512 1.73 yamt return 0; 513 1.73 yamt } 514 1.73 yamt 515 1.73 yamt static int 516 1.73 yamt swapcluster_add(struct swapcluster *swc, struct vm_page *pg) 517 1.73 yamt { 518 1.73 yamt int slot; 519 1.73 yamt struct uvm_object *uobj; 520 1.73 yamt 521 1.73 yamt KASSERT(swc->swc_slot != 0); 522 1.73 yamt KASSERT(swc->swc_nused < swc->swc_nallocated); 523 1.113 ad KASSERT((pg->flags & PG_SWAPBACKED) != 0); 524 1.73 yamt 525 1.73 yamt slot = swc->swc_slot + swc->swc_nused; 526 1.73 yamt uobj = pg->uobject; 527 1.73 yamt if (uobj == NULL) { 528 1.125 ad KASSERT(rw_write_held(pg->uanon->an_lock)); 529 1.73 yamt pg->uanon->an_swslot = slot; 530 1.73 yamt } else { 531 1.73 yamt int result; 532 1.73 yamt 533 1.125 ad KASSERT(rw_write_held(uobj->vmobjlock)); 534 1.73 yamt result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot); 535 1.73 yamt if (result == -1) { 536 1.73 yamt return ENOMEM; 537 1.73 yamt } 538 1.73 yamt } 539 1.73 yamt swc->swc_pages[swc->swc_nused] = pg; 540 1.73 yamt swc->swc_nused++; 541 1.73 yamt 542 1.73 yamt return 0; 543 1.73 yamt } 544 1.73 yamt 545 1.73 yamt static void 546 1.83 thorpej swapcluster_flush(struct swapcluster *swc, bool now) 547 1.73 yamt { 548 1.73 yamt int slot; 549 1.73 yamt int nused; 550 1.73 yamt int nallocated; 551 1.108 martin int error __diagused; 552 1.73 yamt 553 1.73 yamt if (swc->swc_slot == 0) { 554 1.73 yamt return; 555 1.73 yamt } 556 1.73 yamt KASSERT(swc->swc_nused <= swc->swc_nallocated); 557 1.73 yamt 558 1.73 yamt slot = swc->swc_slot; 559 1.73 yamt nused = swc->swc_nused; 560 1.73 yamt nallocated = swc->swc_nallocated; 561 1.73 yamt 562 1.73 yamt /* 563 1.73 yamt * if this is the final pageout we could have a few 564 1.73 yamt * unused swap blocks. if so, free them now. 565 1.73 yamt */ 566 1.73 yamt 567 1.73 yamt if (nused < nallocated) { 568 1.73 yamt if (!now) { 569 1.73 yamt return; 570 1.73 yamt } 571 1.73 yamt uvm_swap_free(slot + nused, nallocated - nused); 572 1.73 yamt } 573 1.73 yamt 574 1.73 yamt /* 575 1.73 yamt * now start the pageout. 576 1.73 yamt */ 577 1.73 yamt 578 1.91 yamt if (nused > 0) { 579 1.91 yamt uvmexp.pdpageouts++; 580 1.91 yamt uvm_pageout_start(nused); 581 1.91 yamt error = uvm_swap_put(slot, swc->swc_pages, nused, 0); 582 1.92 yamt KASSERT(error == 0 || error == ENOMEM); 583 1.91 yamt } 584 1.73 yamt 585 1.73 yamt /* 586 1.73 yamt * zero swslot to indicate that we are 587 1.73 yamt * no longer building a swap-backed cluster. 588 1.73 yamt */ 589 1.73 yamt 590 1.73 yamt swc->swc_slot = 0; 591 1.89 ad swc->swc_nused = 0; 592 1.89 ad } 593 1.89 ad 594 1.89 ad static int 595 1.89 ad swapcluster_nused(struct swapcluster *swc) 596 1.89 ad { 597 1.89 ad 598 1.89 ad return swc->swc_nused; 599 1.73 yamt } 600 1.77 yamt 601 1.77 yamt /* 602 1.77 yamt * uvmpd_dropswap: free any swap allocated to this page. 603 1.77 yamt * 604 1.77 yamt * => called with owner locked. 605 1.84 thorpej * => return true if a page had an associated slot. 606 1.77 yamt */ 607 1.77 yamt 608 1.119 ad bool 609 1.77 yamt uvmpd_dropswap(struct vm_page *pg) 610 1.77 yamt { 611 1.84 thorpej bool result = false; 612 1.77 yamt struct vm_anon *anon = pg->uanon; 613 1.77 yamt 614 1.113 ad if ((pg->flags & PG_ANON) && anon->an_swslot) { 615 1.77 yamt uvm_swap_free(anon->an_swslot, 1); 616 1.77 yamt anon->an_swslot = 0; 617 1.123 ad uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 618 1.84 thorpej result = true; 619 1.113 ad } else if (pg->flags & PG_AOBJ) { 620 1.77 yamt int slot = uao_set_swslot(pg->uobject, 621 1.77 yamt pg->offset >> PAGE_SHIFT, 0); 622 1.77 yamt if (slot) { 623 1.77 yamt uvm_swap_free(slot, 1); 624 1.123 ad uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 625 1.84 thorpej result = true; 626 1.77 yamt } 627 1.77 yamt } 628 1.77 yamt 629 1.77 yamt return result; 630 1.77 yamt } 631 1.77 yamt 632 1.73 yamt #endif /* defined(VMSWAP) */ 633 1.73 yamt 634 1.1 mrg /* 635 1.77 yamt * uvmpd_scan_queue: scan an replace candidate list for pages 636 1.77 yamt * to clean or free. 637 1.1 mrg * 638 1.1 mrg * => we work on meeting our free target by converting inactive pages 639 1.1 mrg * into free pages. 640 1.1 mrg * => we handle the building of swap-backed clusters 641 1.1 mrg */ 642 1.1 mrg 643 1.65 thorpej static void 644 1.77 yamt uvmpd_scan_queue(void) 645 1.8 mrg { 646 1.77 yamt struct vm_page *p; 647 1.8 mrg struct uvm_object *uobj; 648 1.37 chs struct vm_anon *anon; 649 1.68 yamt #if defined(VMSWAP) 650 1.73 yamt struct swapcluster swc; 651 1.68 yamt #endif /* defined(VMSWAP) */ 652 1.77 yamt int dirtyreacts; 653 1.125 ad krwlock_t *slock; 654 1.130 skrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist); 655 1.1 mrg 656 1.8 mrg /* 657 1.8 mrg * swslot is non-zero if we are building a swap cluster. we want 658 1.24 chs * to stay in the loop while we have a page to scan or we have 659 1.8 mrg * a swap-cluster to build. 660 1.8 mrg */ 661 1.24 chs 662 1.73 yamt #if defined(VMSWAP) 663 1.73 yamt swapcluster_init(&swc); 664 1.73 yamt #endif /* defined(VMSWAP) */ 665 1.77 yamt 666 1.14 chs dirtyreacts = 0; 667 1.77 yamt uvmpdpol_scaninit(); 668 1.43 chs 669 1.77 yamt while (/* CONSTCOND */ 1) { 670 1.24 chs 671 1.73 yamt /* 672 1.73 yamt * see if we've met the free target. 673 1.73 yamt */ 674 1.73 yamt 675 1.128 ad if (uvm_availmem(false) + uvmexp.paging 676 1.89 ad #if defined(VMSWAP) 677 1.89 ad + swapcluster_nused(&swc) 678 1.89 ad #endif /* defined(VMSWAP) */ 679 1.89 ad >= uvmexp.freetarg << 2 || 680 1.73 yamt dirtyreacts == UVMPD_NUMDIRTYREACTS) { 681 1.73 yamt UVMHIST_LOG(pdhist," met free target: " 682 1.73 yamt "exit loop", 0, 0, 0, 0); 683 1.73 yamt break; 684 1.73 yamt } 685 1.24 chs 686 1.73 yamt /* 687 1.113 ad * first we have the pdpolicy select a victim page 688 1.113 ad * and attempt to lock the object that the page 689 1.73 yamt * belongs to. if our attempt fails we skip on to 690 1.73 yamt * the next page (no harm done). it is important to 691 1.73 yamt * "try" locking the object as we are locking in the 692 1.73 yamt * wrong order (pageq -> object) and we don't want to 693 1.73 yamt * deadlock. 694 1.73 yamt * 695 1.73 yamt * the only time we expect to see an ownerless page 696 1.113 ad * (i.e. a page with no uobject and !PG_ANON) is if an 697 1.73 yamt * anon has loaned a page from a uvm_object and the 698 1.73 yamt * uvm_object has dropped the ownership. in that 699 1.73 yamt * case, the anon can "take over" the loaned page 700 1.73 yamt * and make it its own. 701 1.73 yamt */ 702 1.30 chs 703 1.113 ad p = uvmpdpol_selectvictim(&slock); 704 1.113 ad if (p == NULL) { 705 1.113 ad break; 706 1.76 yamt } 707 1.113 ad KASSERT(uvmpdpol_pageisqueued_p(p)); 708 1.125 ad KASSERT(uvm_page_owner_locked_p(p, true)); 709 1.113 ad KASSERT(p->wire_count == 0); 710 1.113 ad 711 1.113 ad /* 712 1.113 ad * we are below target and have a new page to consider. 713 1.113 ad */ 714 1.113 ad 715 1.113 ad anon = p->uanon; 716 1.113 ad uobj = p->uobject; 717 1.113 ad 718 1.76 yamt if (p->flags & PG_BUSY) { 719 1.125 ad rw_exit(slock); 720 1.76 yamt uvmexp.pdbusy++; 721 1.76 yamt continue; 722 1.76 yamt } 723 1.76 yamt 724 1.73 yamt /* does the page belong to an object? */ 725 1.73 yamt if (uobj != NULL) { 726 1.73 yamt uvmexp.pdobscan++; 727 1.73 yamt } else { 728 1.73 yamt #if defined(VMSWAP) 729 1.73 yamt KASSERT(anon != NULL); 730 1.73 yamt uvmexp.pdanscan++; 731 1.68 yamt #else /* defined(VMSWAP) */ 732 1.73 yamt panic("%s: anon", __func__); 733 1.68 yamt #endif /* defined(VMSWAP) */ 734 1.73 yamt } 735 1.8 mrg 736 1.37 chs 737 1.73 yamt /* 738 1.113 ad * we now have the object locked. 739 1.73 yamt * if the page is not swap-backed, call the object's 740 1.73 yamt * pager to flush and free the page. 741 1.73 yamt */ 742 1.37 chs 743 1.69 yamt #if defined(READAHEAD_STATS) 744 1.113 ad if ((p->flags & PG_READAHEAD) != 0) { 745 1.113 ad p->flags &= ~PG_READAHEAD; 746 1.73 yamt uvm_ra_miss.ev_count++; 747 1.73 yamt } 748 1.69 yamt #endif /* defined(READAHEAD_STATS) */ 749 1.69 yamt 750 1.113 ad if ((p->flags & PG_SWAPBACKED) == 0) { 751 1.82 alc KASSERT(uobj != NULL); 752 1.73 yamt (void) (uobj->pgops->pgo_put)(uobj, p->offset, 753 1.73 yamt p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE); 754 1.73 yamt continue; 755 1.73 yamt } 756 1.37 chs 757 1.73 yamt /* 758 1.73 yamt * the page is swap-backed. remove all the permissions 759 1.73 yamt * from the page so we can sync the modified info 760 1.73 yamt * without any race conditions. if the page is clean 761 1.73 yamt * we can free it now and continue. 762 1.73 yamt */ 763 1.8 mrg 764 1.73 yamt pmap_page_protect(p, VM_PROT_NONE); 765 1.123 ad if (uvm_pagegetdirty(p) == UVM_PAGE_STATUS_UNKNOWN) { 766 1.123 ad if (pmap_clear_modify(p)) { 767 1.123 ad uvm_pagemarkdirty(p, UVM_PAGE_STATUS_DIRTY); 768 1.123 ad } else { 769 1.123 ad uvm_pagemarkdirty(p, UVM_PAGE_STATUS_CLEAN); 770 1.123 ad } 771 1.73 yamt } 772 1.123 ad if (uvm_pagegetdirty(p) != UVM_PAGE_STATUS_DIRTY) { 773 1.73 yamt int slot; 774 1.73 yamt int pageidx; 775 1.73 yamt 776 1.73 yamt pageidx = p->offset >> PAGE_SHIFT; 777 1.73 yamt uvm_pagefree(p); 778 1.113 ad atomic_inc_uint(&uvmexp.pdfreed); 779 1.8 mrg 780 1.8 mrg /* 781 1.73 yamt * for anons, we need to remove the page 782 1.73 yamt * from the anon ourselves. for aobjs, 783 1.73 yamt * pagefree did that for us. 784 1.8 mrg */ 785 1.24 chs 786 1.73 yamt if (anon) { 787 1.73 yamt KASSERT(anon->an_swslot != 0); 788 1.73 yamt anon->an_page = NULL; 789 1.73 yamt slot = anon->an_swslot; 790 1.73 yamt } else { 791 1.73 yamt slot = uao_find_swslot(uobj, pageidx); 792 1.8 mrg } 793 1.73 yamt if (slot > 0) { 794 1.73 yamt /* this page is now only in swap. */ 795 1.73 yamt KASSERT(uvmexp.swpgonly < uvmexp.swpginuse); 796 1.112 ad atomic_inc_uint(&uvmexp.swpgonly); 797 1.37 chs } 798 1.125 ad rw_exit(slock); 799 1.73 yamt continue; 800 1.73 yamt } 801 1.37 chs 802 1.77 yamt #if defined(VMSWAP) 803 1.73 yamt /* 804 1.73 yamt * this page is dirty, skip it if we'll have met our 805 1.73 yamt * free target when all the current pageouts complete. 806 1.73 yamt */ 807 1.24 chs 808 1.128 ad if (uvm_availmem(false) + uvmexp.paging > 809 1.128 ad uvmexp.freetarg << 2) { 810 1.125 ad rw_exit(slock); 811 1.73 yamt continue; 812 1.73 yamt } 813 1.14 chs 814 1.73 yamt /* 815 1.73 yamt * free any swap space allocated to the page since 816 1.73 yamt * we'll have to write it again with its new data. 817 1.73 yamt */ 818 1.24 chs 819 1.77 yamt uvmpd_dropswap(p); 820 1.14 chs 821 1.73 yamt /* 822 1.97 ad * start new swap pageout cluster (if necessary). 823 1.97 ad * 824 1.97 ad * if swap is full reactivate this page so that 825 1.97 ad * we eventually cycle all pages through the 826 1.97 ad * inactive queue. 827 1.73 yamt */ 828 1.68 yamt 829 1.97 ad if (swapcluster_allocslots(&swc)) { 830 1.73 yamt dirtyreacts++; 831 1.122 ad uvm_pagelock(p); 832 1.73 yamt uvm_pageactivate(p); 833 1.122 ad uvm_pageunlock(p); 834 1.125 ad rw_exit(slock); 835 1.73 yamt continue; 836 1.8 mrg } 837 1.8 mrg 838 1.8 mrg /* 839 1.73 yamt * at this point, we're definitely going reuse this 840 1.73 yamt * page. mark the page busy and delayed-free. 841 1.73 yamt * we should remove the page from the page queues 842 1.73 yamt * so we don't ever look at it again. 843 1.73 yamt * adjust counters and such. 844 1.8 mrg */ 845 1.8 mrg 846 1.73 yamt p->flags |= PG_BUSY; 847 1.77 yamt UVM_PAGE_OWN(p, "scan_queue"); 848 1.113 ad p->flags |= PG_PAGEOUT; 849 1.113 ad uvmexp.pgswapout++; 850 1.73 yamt 851 1.122 ad uvm_pagelock(p); 852 1.73 yamt uvm_pagedequeue(p); 853 1.122 ad uvm_pageunlock(p); 854 1.73 yamt 855 1.8 mrg /* 856 1.73 yamt * add the new page to the cluster. 857 1.8 mrg */ 858 1.8 mrg 859 1.73 yamt if (swapcluster_add(&swc, p)) { 860 1.73 yamt p->flags &= ~(PG_BUSY|PG_PAGEOUT); 861 1.73 yamt UVM_PAGE_OWN(p, NULL); 862 1.77 yamt dirtyreacts++; 863 1.122 ad uvm_pagelock(p); 864 1.73 yamt uvm_pageactivate(p); 865 1.122 ad uvm_pageunlock(p); 866 1.125 ad rw_exit(slock); 867 1.73 yamt continue; 868 1.73 yamt } 869 1.125 ad rw_exit(slock); 870 1.73 yamt 871 1.115 ad swapcluster_flush(&swc, false); 872 1.115 ad 873 1.8 mrg /* 874 1.115 ad * the pageout is in progress. bump counters and set up 875 1.31 chs * for the next loop. 876 1.8 mrg */ 877 1.8 mrg 878 1.115 ad atomic_inc_uint(&uvmexp.pdpending); 879 1.77 yamt 880 1.77 yamt #else /* defined(VMSWAP) */ 881 1.122 ad uvm_pagelock(p); 882 1.77 yamt uvm_pageactivate(p); 883 1.122 ad uvm_pageunlock(p); 884 1.125 ad rw_exit(slock); 885 1.77 yamt #endif /* defined(VMSWAP) */ 886 1.73 yamt } 887 1.73 yamt 888 1.119 ad uvmpdpol_scanfini(); 889 1.119 ad 890 1.73 yamt #if defined(VMSWAP) 891 1.84 thorpej swapcluster_flush(&swc, true); 892 1.68 yamt #endif /* defined(VMSWAP) */ 893 1.1 mrg } 894 1.1 mrg 895 1.1 mrg /* 896 1.1 mrg * uvmpd_scan: scan the page queues and attempt to meet our targets. 897 1.1 mrg */ 898 1.1 mrg 899 1.65 thorpej static void 900 1.37 chs uvmpd_scan(void) 901 1.1 mrg { 902 1.117 ad int swap_shortage, pages_freed, fpages; 903 1.130 skrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(pdhist); 904 1.1 mrg 905 1.37 chs uvmexp.pdrevs++; 906 1.1 mrg 907 1.8 mrg /* 908 1.93 ad * work on meeting our targets. first we work on our free target 909 1.93 ad * by converting inactive pages into free pages. then we work on 910 1.93 ad * meeting our inactive target by converting active pages to 911 1.93 ad * inactive ones. 912 1.8 mrg */ 913 1.8 mrg 914 1.8 mrg UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0); 915 1.8 mrg 916 1.14 chs pages_freed = uvmexp.pdfreed; 917 1.77 yamt uvmpd_scan_queue(); 918 1.14 chs pages_freed = uvmexp.pdfreed - pages_freed; 919 1.8 mrg 920 1.8 mrg /* 921 1.14 chs * detect if we're not going to be able to page anything out 922 1.14 chs * until we free some swap resources from active pages. 923 1.14 chs */ 924 1.24 chs 925 1.14 chs swap_shortage = 0; 926 1.128 ad fpages = uvm_availmem(false); 927 1.117 ad if (fpages < uvmexp.freetarg && 928 1.52 pk uvmexp.swpginuse >= uvmexp.swpgavail && 929 1.52 pk !uvm_swapisfull() && 930 1.14 chs pages_freed == 0) { 931 1.117 ad swap_shortage = uvmexp.freetarg - fpages; 932 1.14 chs } 933 1.24 chs 934 1.77 yamt uvmpdpol_balancequeue(swap_shortage); 935 1.93 ad 936 1.93 ad /* 937 1.94 ad * if still below the minimum target, try unloading kernel 938 1.94 ad * modules. 939 1.94 ad */ 940 1.93 ad 941 1.128 ad if (uvm_availmem(false) < uvmexp.freemin) { 942 1.94 ad module_thread_kick(); 943 1.93 ad } 944 1.1 mrg } 945 1.62 yamt 946 1.62 yamt /* 947 1.62 yamt * uvm_reclaimable: decide whether to wait for pagedaemon. 948 1.62 yamt * 949 1.84 thorpej * => return true if it seems to be worth to do uvm_wait. 950 1.62 yamt * 951 1.62 yamt * XXX should be tunable. 952 1.62 yamt * XXX should consider pools, etc? 953 1.62 yamt */ 954 1.62 yamt 955 1.83 thorpej bool 956 1.62 yamt uvm_reclaimable(void) 957 1.62 yamt { 958 1.62 yamt int filepages; 959 1.77 yamt int active, inactive; 960 1.62 yamt 961 1.62 yamt /* 962 1.62 yamt * if swap is not full, no problem. 963 1.62 yamt */ 964 1.62 yamt 965 1.62 yamt if (!uvm_swapisfull()) { 966 1.84 thorpej return true; 967 1.62 yamt } 968 1.62 yamt 969 1.62 yamt /* 970 1.62 yamt * file-backed pages can be reclaimed even when swap is full. 971 1.62 yamt * if we have more than 1/16 of pageable memory or 5MB, try to reclaim. 972 1.129 ad * NB: filepages calculation does not exclude EXECPAGES - intentional. 973 1.62 yamt * 974 1.62 yamt * XXX assume the worst case, ie. all wired pages are file-backed. 975 1.63 yamt * 976 1.63 yamt * XXX should consider about other reclaimable memory. 977 1.63 yamt * XXX ie. pools, traditional buffer cache. 978 1.62 yamt */ 979 1.62 yamt 980 1.129 ad cpu_count_sync(false); 981 1.129 ad filepages = (int)(cpu_count_get(CPU_COUNT_FILECLEAN) + 982 1.129 ad cpu_count_get(CPU_COUNT_FILEUNKNOWN) + 983 1.129 ad cpu_count_get(CPU_COUNT_FILEDIRTY) - uvmexp.wired); 984 1.77 yamt uvm_estimatepageable(&active, &inactive); 985 1.77 yamt if (filepages >= MIN((active + inactive) >> 4, 986 1.62 yamt 5 * 1024 * 1024 >> PAGE_SHIFT)) { 987 1.84 thorpej return true; 988 1.62 yamt } 989 1.62 yamt 990 1.62 yamt /* 991 1.62 yamt * kill the process, fail allocation, etc.. 992 1.62 yamt */ 993 1.62 yamt 994 1.84 thorpej return false; 995 1.62 yamt } 996 1.77 yamt 997 1.77 yamt void 998 1.77 yamt uvm_estimatepageable(int *active, int *inactive) 999 1.77 yamt { 1000 1.77 yamt 1001 1.77 yamt uvmpdpol_estimatepageable(active, inactive); 1002 1.77 yamt } 1003 1.98 haad 1004 1.110 chs 1005 1.110 chs /* 1006 1.110 chs * Use a separate thread for draining pools. 1007 1.110 chs * This work can't done from the main pagedaemon thread because 1008 1.110 chs * some pool allocators need to take vm_map locks. 1009 1.110 chs */ 1010 1.110 chs 1011 1.110 chs static void 1012 1.110 chs uvmpd_pool_drain_thread(void *arg) 1013 1.110 chs { 1014 1.119 ad struct pool *firstpool, *curpool; 1015 1.119 ad int bufcnt, lastslept; 1016 1.119 ad bool cycled; 1017 1.110 chs 1018 1.119 ad firstpool = NULL; 1019 1.119 ad cycled = true; 1020 1.110 chs for (;;) { 1021 1.119 ad /* 1022 1.119 ad * sleep until awoken by the pagedaemon. 1023 1.119 ad */ 1024 1.117 ad mutex_enter(&uvmpd_lock); 1025 1.110 chs if (!uvmpd_pool_drain_run) { 1026 1.126 maxv lastslept = getticks(); 1027 1.117 ad cv_wait(&uvmpd_pool_drain_cv, &uvmpd_lock); 1028 1.126 maxv if (getticks() != lastslept) { 1029 1.119 ad cycled = false; 1030 1.119 ad firstpool = NULL; 1031 1.119 ad } 1032 1.110 chs } 1033 1.110 chs uvmpd_pool_drain_run = false; 1034 1.117 ad mutex_exit(&uvmpd_lock); 1035 1.110 chs 1036 1.110 chs /* 1037 1.119 ad * rate limit draining, otherwise in desperate circumstances 1038 1.119 ad * this can totally saturate the system with xcall activity. 1039 1.119 ad */ 1040 1.119 ad if (cycled) { 1041 1.119 ad kpause("uvmpdlmt", false, 1, NULL); 1042 1.119 ad cycled = false; 1043 1.119 ad firstpool = NULL; 1044 1.119 ad } 1045 1.119 ad 1046 1.119 ad /* 1047 1.119 ad * drain and temporarily disable the freelist cache. 1048 1.119 ad */ 1049 1.119 ad uvm_pgflcache_pause(); 1050 1.119 ad 1051 1.119 ad /* 1052 1.110 chs * kill unused metadata buffers. 1053 1.110 chs */ 1054 1.128 ad bufcnt = uvmexp.freetarg - uvm_availmem(false); 1055 1.110 chs if (bufcnt < 0) 1056 1.110 chs bufcnt = 0; 1057 1.110 chs 1058 1.110 chs mutex_enter(&bufcache_lock); 1059 1.110 chs buf_drain(bufcnt << PAGE_SHIFT); 1060 1.110 chs mutex_exit(&bufcache_lock); 1061 1.110 chs 1062 1.110 chs /* 1063 1.130 skrll * drain a pool, and then re-enable the freelist cache. 1064 1.110 chs */ 1065 1.119 ad (void)pool_drain(&curpool); 1066 1.119 ad KASSERT(curpool != NULL); 1067 1.119 ad if (firstpool == NULL) { 1068 1.119 ad firstpool = curpool; 1069 1.119 ad } else if (firstpool == curpool) { 1070 1.119 ad cycled = true; 1071 1.119 ad } 1072 1.119 ad uvm_pgflcache_resume(); 1073 1.110 chs } 1074 1.110 chs /*NOTREACHED*/ 1075 1.110 chs } 1076 1.110 chs 1077 1.110 chs static void 1078 1.110 chs uvmpd_pool_drain_wakeup(void) 1079 1.110 chs { 1080 1.110 chs 1081 1.117 ad mutex_enter(&uvmpd_lock); 1082 1.110 chs uvmpd_pool_drain_run = true; 1083 1.110 chs cv_signal(&uvmpd_pool_drain_cv); 1084 1.117 ad mutex_exit(&uvmpd_lock); 1085 1.110 chs } 1086