1 1.256 thorpej /* $NetBSD: uvm_page.c,v 1.256 2024/03/05 14:33:50 thorpej Exp $ */ 2 1.213 ad 3 1.213 ad /*- 4 1.224 ad * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. 5 1.213 ad * All rights reserved. 6 1.213 ad * 7 1.213 ad * This code is derived from software contributed to The NetBSD Foundation 8 1.213 ad * by Andrew Doran. 9 1.213 ad * 10 1.213 ad * Redistribution and use in source and binary forms, with or without 11 1.213 ad * modification, are permitted provided that the following conditions 12 1.213 ad * are met: 13 1.213 ad * 1. Redistributions of source code must retain the above copyright 14 1.213 ad * notice, this list of conditions and the following disclaimer. 15 1.213 ad * 2. Redistributions in binary form must reproduce the above copyright 16 1.213 ad * notice, this list of conditions and the following disclaimer in the 17 1.213 ad * documentation and/or other materials provided with the distribution. 18 1.213 ad * 19 1.213 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.213 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.213 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.213 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.213 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.213 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.213 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.213 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.213 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.213 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.213 ad * POSSIBILITY OF SUCH DAMAGE. 30 1.213 ad */ 31 1.1 mrg 32 1.62 chs /* 33 1.1 mrg * Copyright (c) 1997 Charles D. Cranor and Washington University. 34 1.62 chs * Copyright (c) 1991, 1993, The Regents of the University of California. 35 1.1 mrg * 36 1.1 mrg * All rights reserved. 37 1.1 mrg * 38 1.1 mrg * This code is derived from software contributed to Berkeley by 39 1.1 mrg * The Mach Operating System project at Carnegie-Mellon University. 40 1.1 mrg * 41 1.1 mrg * Redistribution and use in source and binary forms, with or without 42 1.1 mrg * modification, are permitted provided that the following conditions 43 1.1 mrg * are met: 44 1.1 mrg * 1. Redistributions of source code must retain the above copyright 45 1.1 mrg * notice, this list of conditions and the following disclaimer. 46 1.1 mrg * 2. Redistributions in binary form must reproduce the above copyright 47 1.1 mrg * notice, this list of conditions and the following disclaimer in the 48 1.1 mrg * documentation and/or other materials provided with the distribution. 49 1.170 chuck * 3. Neither the name of the University nor the names of its contributors 50 1.1 mrg * may be used to endorse or promote products derived from this software 51 1.1 mrg * without specific prior written permission. 52 1.1 mrg * 53 1.1 mrg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 1.1 mrg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 1.1 mrg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 1.1 mrg * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 1.1 mrg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 1.1 mrg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 1.1 mrg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 1.1 mrg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 1.1 mrg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 1.1 mrg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 1.1 mrg * SUCH DAMAGE. 64 1.1 mrg * 65 1.1 mrg * @(#)vm_page.c 8.3 (Berkeley) 3/21/94 66 1.4 mrg * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp 67 1.1 mrg * 68 1.1 mrg * 69 1.1 mrg * Copyright (c) 1987, 1990 Carnegie-Mellon University. 70 1.1 mrg * All rights reserved. 71 1.62 chs * 72 1.1 mrg * Permission to use, copy, modify and distribute this software and 73 1.1 mrg * its documentation is hereby granted, provided that both the copyright 74 1.1 mrg * notice and this permission notice appear in all copies of the 75 1.1 mrg * software, derivative works or modified versions, and any portions 76 1.1 mrg * thereof, and that both notices appear in supporting documentation. 77 1.62 chs * 78 1.62 chs * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 79 1.62 chs * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 80 1.1 mrg * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 81 1.62 chs * 82 1.1 mrg * Carnegie Mellon requests users of this software to return to 83 1.1 mrg * 84 1.1 mrg * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU 85 1.1 mrg * School of Computer Science 86 1.1 mrg * Carnegie Mellon University 87 1.1 mrg * Pittsburgh PA 15213-3890 88 1.1 mrg * 89 1.1 mrg * any improvements or extensions that they make and grant Carnegie the 90 1.1 mrg * rights to redistribute these changes. 91 1.1 mrg */ 92 1.1 mrg 93 1.1 mrg /* 94 1.1 mrg * uvm_page.c: page ops. 95 1.1 mrg */ 96 1.71 lukem 97 1.71 lukem #include <sys/cdefs.h> 98 1.256 thorpej __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.256 2024/03/05 14:33:50 thorpej Exp $"); 99 1.6 mrg 100 1.151 thorpej #include "opt_ddb.h" 101 1.187 joerg #include "opt_uvm.h" 102 1.44 chs #include "opt_uvmhist.h" 103 1.113 yamt #include "opt_readahead.h" 104 1.44 chs 105 1.1 mrg #include <sys/param.h> 106 1.1 mrg #include <sys/systm.h> 107 1.35 thorpej #include <sys/sched.h> 108 1.44 chs #include <sys/kernel.h> 109 1.51 chs #include <sys/vnode.h> 110 1.68 chs #include <sys/proc.h> 111 1.202 ad #include <sys/radixtree.h> 112 1.126 ad #include <sys/atomic.h> 113 1.133 ad #include <sys/cpu.h> 114 1.1 mrg 115 1.251 riastrad #include <ddb/db_active.h> 116 1.251 riastrad 117 1.1 mrg #include <uvm/uvm.h> 118 1.151 thorpej #include <uvm/uvm_ddb.h> 119 1.113 yamt #include <uvm/uvm_pdpolicy.h> 120 1.213 ad #include <uvm/uvm_pgflcache.h> 121 1.1 mrg 122 1.1 mrg /* 123 1.140 ad * number of pages per-CPU to reserve for the kernel. 124 1.140 ad */ 125 1.187 joerg #ifndef UVM_RESERVED_PAGES_PER_CPU 126 1.187 joerg #define UVM_RESERVED_PAGES_PER_CPU 5 127 1.187 joerg #endif 128 1.187 joerg int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU; 129 1.140 ad 130 1.140 ad /* 131 1.148 matt * physical memory size; 132 1.148 matt */ 133 1.189 cherry psize_t physmem; 134 1.148 matt 135 1.148 matt /* 136 1.1 mrg * local variables 137 1.1 mrg */ 138 1.1 mrg 139 1.1 mrg /* 140 1.88 thorpej * these variables record the values returned by vm_page_bootstrap, 141 1.88 thorpej * for debugging purposes. The implementation of uvm_pageboot_alloc 142 1.88 thorpej * and pmap_startup here also uses them internally. 143 1.88 thorpej */ 144 1.88 thorpej 145 1.88 thorpej static vaddr_t virtual_space_start; 146 1.88 thorpej static vaddr_t virtual_space_end; 147 1.88 thorpej 148 1.88 thorpej /* 149 1.60 thorpej * we allocate an initial number of page colors in uvm_page_init(), 150 1.60 thorpej * and remember them. We may re-color pages as cache sizes are 151 1.60 thorpej * discovered during the autoconfiguration phase. But we can never 152 1.60 thorpej * free the initial set of buckets, since they are allocated using 153 1.60 thorpej * uvm_pageboot_alloc(). 154 1.60 thorpej */ 155 1.60 thorpej 156 1.179 para static size_t recolored_pages_memsize /* = 0 */; 157 1.213 ad static char *recolored_pages_mem; 158 1.213 ad 159 1.213 ad /* 160 1.213 ad * freelist locks - one per bucket. 161 1.213 ad */ 162 1.213 ad 163 1.213 ad union uvm_freelist_lock uvm_freelist_locks[PGFL_MAX_BUCKETS] 164 1.213 ad __cacheline_aligned; 165 1.213 ad 166 1.213 ad /* 167 1.213 ad * basic NUMA information. 168 1.213 ad */ 169 1.213 ad 170 1.213 ad static struct uvm_page_numa_region { 171 1.213 ad struct uvm_page_numa_region *next; 172 1.213 ad paddr_t start; 173 1.213 ad paddr_t size; 174 1.213 ad u_int numa_id; 175 1.213 ad } *uvm_page_numa_region; 176 1.60 thorpej 177 1.91 yamt #ifdef DEBUG 178 1.223 ad kmutex_t uvm_zerochecklock __cacheline_aligned; 179 1.91 yamt vaddr_t uvm_zerocheckkva; 180 1.91 yamt #endif /* DEBUG */ 181 1.91 yamt 182 1.60 thorpej /* 183 1.190 cherry * These functions are reserved for uvm(9) internal use and are not 184 1.190 cherry * exported in the header file uvm_physseg.h 185 1.190 cherry * 186 1.190 cherry * Thus they are redefined here. 187 1.190 cherry */ 188 1.190 cherry void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *); 189 1.190 cherry void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t); 190 1.190 cherry 191 1.190 cherry /* returns a pgs array */ 192 1.190 cherry struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t); 193 1.190 cherry 194 1.190 cherry /* 195 1.1 mrg * inline functions 196 1.1 mrg */ 197 1.1 mrg 198 1.1 mrg /* 199 1.134 ad * uvm_pageinsert: insert a page in the object. 200 1.1 mrg * 201 1.1 mrg * => caller must lock object 202 1.1 mrg * => call should have already set pg's object and offset pointers 203 1.1 mrg * and bumped the version counter 204 1.1 mrg */ 205 1.1 mrg 206 1.136 yamt static inline void 207 1.203 ad uvm_pageinsert_object(struct uvm_object *uobj, struct vm_page *pg) 208 1.1 mrg { 209 1.1 mrg 210 1.136 yamt KASSERT(uobj == pg->uobject); 211 1.226 ad KASSERT(rw_write_held(uobj->vmobjlock)); 212 1.51 chs KASSERT((pg->flags & PG_TABLED) == 0); 213 1.123 ad 214 1.224 ad if ((pg->flags & PG_STAT) != 0) { 215 1.224 ad /* Cannot use uvm_pagegetdirty(): not yet in radix tree. */ 216 1.224 ad const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 217 1.224 ad 218 1.236 ad if ((pg->flags & PG_FILE) != 0) { 219 1.224 ad if (uobj->uo_npages == 0) { 220 1.228 ad struct vnode *vp = (struct vnode *)uobj; 221 1.228 ad mutex_enter(vp->v_interlock); 222 1.228 ad KASSERT((vp->v_iflag & VI_PAGES) == 0); 223 1.228 ad vp->v_iflag |= VI_PAGES; 224 1.228 ad vholdl(vp); 225 1.228 ad mutex_exit(vp->v_interlock); 226 1.224 ad } 227 1.224 ad if (UVM_OBJ_IS_VTEXT(uobj)) { 228 1.240 ad cpu_count(CPU_COUNT_EXECPAGES, 1); 229 1.224 ad } 230 1.240 ad cpu_count(CPU_COUNT_FILEUNKNOWN + status, 1); 231 1.94 yamt } else { 232 1.240 ad cpu_count(CPU_COUNT_ANONUNKNOWN + status, 1); 233 1.94 yamt } 234 1.78 chs } 235 1.7 mrg pg->flags |= PG_TABLED; 236 1.67 chs uobj->uo_npages++; 237 1.1 mrg } 238 1.1 mrg 239 1.202 ad static inline int 240 1.136 yamt uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg) 241 1.136 yamt { 242 1.202 ad const uint64_t idx = pg->offset >> PAGE_SHIFT; 243 1.202 ad int error; 244 1.136 yamt 245 1.245 chs KASSERT(rw_write_held(uobj->vmobjlock)); 246 1.245 chs 247 1.202 ad error = radix_tree_insert_node(&uobj->uo_pages, idx, pg); 248 1.202 ad if (error != 0) { 249 1.202 ad return error; 250 1.202 ad } 251 1.224 ad if ((pg->flags & PG_CLEAN) == 0) { 252 1.245 chs uvm_obj_page_set_dirty(pg); 253 1.224 ad } 254 1.224 ad KASSERT(((pg->flags & PG_CLEAN) == 0) == 255 1.245 chs uvm_obj_page_dirty_p(pg)); 256 1.202 ad return 0; 257 1.136 yamt } 258 1.136 yamt 259 1.1 mrg /* 260 1.134 ad * uvm_page_remove: remove page from object. 261 1.1 mrg * 262 1.1 mrg * => caller must lock object 263 1.1 mrg */ 264 1.1 mrg 265 1.109 perry static inline void 266 1.203 ad uvm_pageremove_object(struct uvm_object *uobj, struct vm_page *pg) 267 1.1 mrg { 268 1.1 mrg 269 1.136 yamt KASSERT(uobj == pg->uobject); 270 1.226 ad KASSERT(rw_write_held(uobj->vmobjlock)); 271 1.44 chs KASSERT(pg->flags & PG_TABLED); 272 1.123 ad 273 1.224 ad if ((pg->flags & PG_STAT) != 0) { 274 1.224 ad /* Cannot use uvm_pagegetdirty(): no longer in radix tree. */ 275 1.224 ad const unsigned int status = pg->flags & (PG_CLEAN | PG_DIRTY); 276 1.224 ad 277 1.236 ad if ((pg->flags & PG_FILE) != 0) { 278 1.224 ad if (uobj->uo_npages == 1) { 279 1.228 ad struct vnode *vp = (struct vnode *)uobj; 280 1.228 ad mutex_enter(vp->v_interlock); 281 1.228 ad KASSERT((vp->v_iflag & VI_PAGES) != 0); 282 1.228 ad vp->v_iflag &= ~VI_PAGES; 283 1.228 ad holdrelel(vp); 284 1.228 ad mutex_exit(vp->v_interlock); 285 1.224 ad } 286 1.224 ad if (UVM_OBJ_IS_VTEXT(uobj)) { 287 1.240 ad cpu_count(CPU_COUNT_EXECPAGES, -1); 288 1.224 ad } 289 1.240 ad cpu_count(CPU_COUNT_FILEUNKNOWN + status, -1); 290 1.94 yamt } else { 291 1.240 ad cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 292 1.94 yamt } 293 1.51 chs } 294 1.67 chs uobj->uo_npages--; 295 1.7 mrg pg->flags &= ~PG_TABLED; 296 1.7 mrg pg->uobject = NULL; 297 1.1 mrg } 298 1.1 mrg 299 1.136 yamt static inline void 300 1.136 yamt uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg) 301 1.136 yamt { 302 1.202 ad struct vm_page *opg __unused; 303 1.136 yamt 304 1.245 chs KASSERT(rw_write_held(uobj->vmobjlock)); 305 1.245 chs 306 1.202 ad opg = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT); 307 1.202 ad KASSERT(pg == opg); 308 1.136 yamt } 309 1.136 yamt 310 1.60 thorpej static void 311 1.213 ad uvm_page_init_bucket(struct pgfreelist *pgfl, struct pgflbucket *pgb, int num) 312 1.60 thorpej { 313 1.213 ad int i; 314 1.60 thorpej 315 1.213 ad pgb->pgb_nfree = 0; 316 1.213 ad for (i = 0; i < uvmexp.ncolors; i++) { 317 1.213 ad LIST_INIT(&pgb->pgb_colors[i]); 318 1.60 thorpej } 319 1.213 ad pgfl->pgfl_buckets[num] = pgb; 320 1.60 thorpej } 321 1.60 thorpej 322 1.1 mrg /* 323 1.1 mrg * uvm_page_init: init the page system. called from uvm_init(). 324 1.62 chs * 325 1.1 mrg * => we return the range of kernel virtual memory in kvm_startp/kvm_endp 326 1.1 mrg */ 327 1.1 mrg 328 1.7 mrg void 329 1.105 thorpej uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp) 330 1.1 mrg { 331 1.256 thorpej static struct uvm_cpu uvm_boot_cpu __cacheline_aligned; 332 1.213 ad psize_t freepages, pagecount, bucketsize, n; 333 1.213 ad struct pgflbucket *pgb; 334 1.63 chs struct vm_page *pagearray; 335 1.213 ad char *bucketarray; 336 1.190 cherry uvm_physseg_t bank; 337 1.213 ad int fl, b; 338 1.7 mrg 339 1.133 ad KASSERT(ncpu <= 1); 340 1.133 ad 341 1.7 mrg /* 342 1.213 ad * init the page queues and free page queue locks, except the 343 1.201 ad * free list; we allocate that later (with the initial vm_page 344 1.60 thorpej * structures). 345 1.7 mrg */ 346 1.51 chs 347 1.256 thorpej curcpu()->ci_data.cpu_uvm = &uvm_boot_cpu; 348 1.113 yamt uvmpdpol_init(); 349 1.213 ad for (b = 0; b < __arraycount(uvm_freelist_locks); b++) { 350 1.213 ad mutex_init(&uvm_freelist_locks[b].lock, MUTEX_DEFAULT, IPL_VM); 351 1.213 ad } 352 1.7 mrg 353 1.7 mrg /* 354 1.51 chs * allocate vm_page structures. 355 1.7 mrg */ 356 1.7 mrg 357 1.7 mrg /* 358 1.7 mrg * sanity check: 359 1.7 mrg * before calling this function the MD code is expected to register 360 1.7 mrg * some free RAM with the uvm_page_physload() function. our job 361 1.7 mrg * now is to allocate vm_page structures for this memory. 362 1.7 mrg */ 363 1.7 mrg 364 1.190 cherry if (uvm_physseg_get_last() == UVM_PHYSSEG_TYPE_INVALID) 365 1.42 mrg panic("uvm_page_bootstrap: no memory pre-allocated"); 366 1.62 chs 367 1.7 mrg /* 368 1.62 chs * first calculate the number of free pages... 369 1.7 mrg * 370 1.7 mrg * note that we use start/end rather than avail_start/avail_end. 371 1.7 mrg * this allows us to allocate extra vm_page structures in case we 372 1.7 mrg * want to return some memory to the pool after booting. 373 1.7 mrg */ 374 1.62 chs 375 1.7 mrg freepages = 0; 376 1.190 cherry 377 1.190 cherry for (bank = uvm_physseg_get_first(); 378 1.190 cherry uvm_physseg_valid_p(bank) ; 379 1.190 cherry bank = uvm_physseg_get_next(bank)) { 380 1.190 cherry freepages += (uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank)); 381 1.158 uebayasi } 382 1.7 mrg 383 1.7 mrg /* 384 1.60 thorpej * Let MD code initialize the number of colors, or default 385 1.60 thorpej * to 1 color if MD code doesn't care. 386 1.60 thorpej */ 387 1.60 thorpej if (uvmexp.ncolors == 0) 388 1.60 thorpej uvmexp.ncolors = 1; 389 1.60 thorpej uvmexp.colormask = uvmexp.ncolors - 1; 390 1.178 uebayasi KASSERT((uvmexp.colormask & uvmexp.ncolors) == 0); 391 1.60 thorpej 392 1.213 ad /* We always start with only 1 bucket. */ 393 1.213 ad uvm.bucketcount = 1; 394 1.213 ad 395 1.60 thorpej /* 396 1.7 mrg * we now know we have (PAGE_SIZE * freepages) bytes of memory we can 397 1.7 mrg * use. for each page of memory we use we need a vm_page structure. 398 1.7 mrg * thus, the total number of pages we can use is the total size of 399 1.7 mrg * the memory divided by the PAGE_SIZE plus the size of the vm_page 400 1.7 mrg * structure. we add one to freepages as a fudge factor to avoid 401 1.7 mrg * truncation errors (since we can only allocate in terms of whole 402 1.7 mrg * pages). 403 1.7 mrg */ 404 1.15 chs pagecount = ((freepages + 1) << PAGE_SHIFT) / 405 1.7 mrg (PAGE_SIZE + sizeof(struct vm_page)); 406 1.213 ad bucketsize = offsetof(struct pgflbucket, pgb_colors[uvmexp.ncolors]); 407 1.213 ad bucketsize = roundup2(bucketsize, coherency_unit); 408 1.213 ad bucketarray = (void *)uvm_pageboot_alloc( 409 1.213 ad bucketsize * VM_NFREELIST + 410 1.213 ad pagecount * sizeof(struct vm_page)); 411 1.213 ad pagearray = (struct vm_page *) 412 1.213 ad (bucketarray + bucketsize * VM_NFREELIST); 413 1.213 ad 414 1.213 ad for (fl = 0; fl < VM_NFREELIST; fl++) { 415 1.213 ad pgb = (struct pgflbucket *)(bucketarray + bucketsize * fl); 416 1.213 ad uvm_page_init_bucket(&uvm.page_free[fl], pgb, 0); 417 1.60 thorpej } 418 1.13 perry memset(pagearray, 0, pagecount * sizeof(struct vm_page)); 419 1.62 chs 420 1.7 mrg /* 421 1.213 ad * init the freelist cache in the disabled state. 422 1.213 ad */ 423 1.213 ad uvm_pgflcache_init(); 424 1.213 ad 425 1.213 ad /* 426 1.51 chs * init the vm_page structures and put them in the correct place. 427 1.7 mrg */ 428 1.190 cherry /* First init the extent */ 429 1.7 mrg 430 1.190 cherry for (bank = uvm_physseg_get_first(), 431 1.190 cherry uvm_physseg_seg_chomp_slab(bank, pagearray, pagecount); 432 1.190 cherry uvm_physseg_valid_p(bank); 433 1.190 cherry bank = uvm_physseg_get_next(bank)) { 434 1.190 cherry 435 1.190 cherry n = uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank); 436 1.190 cherry uvm_physseg_seg_alloc_from_slab(bank, n); 437 1.190 cherry uvm_physseg_init_seg(bank, pagearray); 438 1.51 chs 439 1.7 mrg /* set up page array pointers */ 440 1.7 mrg pagearray += n; 441 1.7 mrg pagecount -= n; 442 1.7 mrg } 443 1.44 chs 444 1.7 mrg /* 445 1.88 thorpej * pass up the values of virtual_space_start and 446 1.88 thorpej * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper 447 1.88 thorpej * layers of the VM. 448 1.88 thorpej */ 449 1.88 thorpej 450 1.88 thorpej *kvm_startp = round_page(virtual_space_start); 451 1.88 thorpej *kvm_endp = trunc_page(virtual_space_end); 452 1.88 thorpej 453 1.88 thorpej /* 454 1.51 chs * init various thresholds. 455 1.7 mrg */ 456 1.51 chs 457 1.7 mrg uvmexp.reserve_pagedaemon = 1; 458 1.140 ad uvmexp.reserve_kernel = vm_page_reserve_kernel; 459 1.7 mrg 460 1.7 mrg /* 461 1.213 ad * done! 462 1.34 thorpej */ 463 1.51 chs 464 1.213 ad uvm.page_init_done = true; 465 1.213 ad } 466 1.213 ad 467 1.213 ad /* 468 1.213 ad * uvm_pgfl_lock: lock all freelist buckets 469 1.213 ad */ 470 1.213 ad 471 1.213 ad void 472 1.213 ad uvm_pgfl_lock(void) 473 1.213 ad { 474 1.213 ad int i; 475 1.213 ad 476 1.213 ad for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 477 1.213 ad mutex_spin_enter(&uvm_freelist_locks[i].lock); 478 1.213 ad } 479 1.213 ad } 480 1.213 ad 481 1.213 ad /* 482 1.213 ad * uvm_pgfl_unlock: unlock all freelist buckets 483 1.213 ad */ 484 1.34 thorpej 485 1.213 ad void 486 1.213 ad uvm_pgfl_unlock(void) 487 1.213 ad { 488 1.213 ad int i; 489 1.1 mrg 490 1.213 ad for (i = 0; i < __arraycount(uvm_freelist_locks); i++) { 491 1.213 ad mutex_spin_exit(&uvm_freelist_locks[i].lock); 492 1.213 ad } 493 1.1 mrg } 494 1.1 mrg 495 1.1 mrg /* 496 1.1 mrg * uvm_setpagesize: set the page size 497 1.62 chs * 498 1.1 mrg * => sets page_shift and page_mask from uvmexp.pagesize. 499 1.62 chs */ 500 1.1 mrg 501 1.7 mrg void 502 1.105 thorpej uvm_setpagesize(void) 503 1.1 mrg { 504 1.85 thorpej 505 1.85 thorpej /* 506 1.85 thorpej * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE 507 1.85 thorpej * to be a constant (indicated by being a non-zero value). 508 1.85 thorpej */ 509 1.85 thorpej if (uvmexp.pagesize == 0) { 510 1.85 thorpej if (PAGE_SIZE == 0) 511 1.85 thorpej panic("uvm_setpagesize: uvmexp.pagesize not set"); 512 1.85 thorpej uvmexp.pagesize = PAGE_SIZE; 513 1.85 thorpej } 514 1.7 mrg uvmexp.pagemask = uvmexp.pagesize - 1; 515 1.7 mrg if ((uvmexp.pagemask & uvmexp.pagesize) != 0) 516 1.168 matt panic("uvm_setpagesize: page size %u (%#x) not a power of two", 517 1.168 matt uvmexp.pagesize, uvmexp.pagesize); 518 1.7 mrg for (uvmexp.pageshift = 0; ; uvmexp.pageshift++) 519 1.7 mrg if ((1 << uvmexp.pageshift) == uvmexp.pagesize) 520 1.7 mrg break; 521 1.1 mrg } 522 1.1 mrg 523 1.1 mrg /* 524 1.1 mrg * uvm_pageboot_alloc: steal memory from physmem for bootstrapping 525 1.1 mrg */ 526 1.1 mrg 527 1.14 eeh vaddr_t 528 1.105 thorpej uvm_pageboot_alloc(vsize_t size) 529 1.1 mrg { 530 1.119 thorpej static bool initialized = false; 531 1.14 eeh vaddr_t addr; 532 1.52 thorpej #if !defined(PMAP_STEAL_MEMORY) 533 1.52 thorpej vaddr_t vaddr; 534 1.14 eeh paddr_t paddr; 535 1.52 thorpej #endif 536 1.1 mrg 537 1.7 mrg /* 538 1.19 thorpej * on first call to this function, initialize ourselves. 539 1.7 mrg */ 540 1.119 thorpej if (initialized == false) { 541 1.88 thorpej pmap_virtual_space(&virtual_space_start, &virtual_space_end); 542 1.1 mrg 543 1.7 mrg /* round it the way we like it */ 544 1.88 thorpej virtual_space_start = round_page(virtual_space_start); 545 1.88 thorpej virtual_space_end = trunc_page(virtual_space_end); 546 1.19 thorpej 547 1.119 thorpej initialized = true; 548 1.7 mrg } 549 1.52 thorpej 550 1.52 thorpej /* round to page size */ 551 1.52 thorpej size = round_page(size); 552 1.195 mrg uvmexp.bootpages += atop(size); 553 1.52 thorpej 554 1.52 thorpej #if defined(PMAP_STEAL_MEMORY) 555 1.52 thorpej 556 1.62 chs /* 557 1.62 chs * defer bootstrap allocation to MD code (it may want to allocate 558 1.52 thorpej * from a direct-mapped segment). pmap_steal_memory should adjust 559 1.88 thorpej * virtual_space_start/virtual_space_end if necessary. 560 1.52 thorpej */ 561 1.52 thorpej 562 1.88 thorpej addr = pmap_steal_memory(size, &virtual_space_start, 563 1.88 thorpej &virtual_space_end); 564 1.52 thorpej 565 1.250 skrll return addr; 566 1.52 thorpej 567 1.52 thorpej #else /* !PMAP_STEAL_MEMORY */ 568 1.1 mrg 569 1.7 mrg /* 570 1.7 mrg * allocate virtual memory for this request 571 1.7 mrg */ 572 1.88 thorpej if (virtual_space_start == virtual_space_end || 573 1.88 thorpej (virtual_space_end - virtual_space_start) < size) 574 1.19 thorpej panic("uvm_pageboot_alloc: out of virtual space"); 575 1.20 thorpej 576 1.88 thorpej addr = virtual_space_start; 577 1.20 thorpej 578 1.20 thorpej #ifdef PMAP_GROWKERNEL 579 1.20 thorpej /* 580 1.20 thorpej * If the kernel pmap can't map the requested space, 581 1.20 thorpej * then allocate more resources for it. 582 1.20 thorpej */ 583 1.20 thorpej if (uvm_maxkaddr < (addr + size)) { 584 1.20 thorpej uvm_maxkaddr = pmap_growkernel(addr + size); 585 1.20 thorpej if (uvm_maxkaddr < (addr + size)) 586 1.20 thorpej panic("uvm_pageboot_alloc: pmap_growkernel() failed"); 587 1.19 thorpej } 588 1.20 thorpej #endif 589 1.1 mrg 590 1.88 thorpej virtual_space_start += size; 591 1.1 mrg 592 1.9 thorpej /* 593 1.7 mrg * allocate and mapin physical pages to back new virtual pages 594 1.7 mrg */ 595 1.1 mrg 596 1.7 mrg for (vaddr = round_page(addr) ; vaddr < addr + size ; 597 1.7 mrg vaddr += PAGE_SIZE) { 598 1.1 mrg 599 1.7 mrg if (!uvm_page_physget(&paddr)) 600 1.7 mrg panic("uvm_pageboot_alloc: out of memory"); 601 1.1 mrg 602 1.23 thorpej /* 603 1.23 thorpej * Note this memory is no longer managed, so using 604 1.23 thorpej * pmap_kenter is safe. 605 1.23 thorpej */ 606 1.152 cegger pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 607 1.7 mrg } 608 1.66 chris pmap_update(pmap_kernel()); 609 1.250 skrll return addr; 610 1.1 mrg #endif /* PMAP_STEAL_MEMORY */ 611 1.1 mrg } 612 1.1 mrg 613 1.1 mrg #if !defined(PMAP_STEAL_MEMORY) 614 1.1 mrg /* 615 1.1 mrg * uvm_page_physget: "steal" one page from the vm_physmem structure. 616 1.1 mrg * 617 1.1 mrg * => attempt to allocate it off the end of a segment in which the "avail" 618 1.1 mrg * values match the start/end values. if we can't do that, then we 619 1.1 mrg * will advance both values (making them equal, and removing some 620 1.1 mrg * vm_page structures from the non-avail area). 621 1.1 mrg * => return false if out of memory. 622 1.1 mrg */ 623 1.1 mrg 624 1.28 drochner /* subroutine: try to allocate from memory chunks on the specified freelist */ 625 1.118 thorpej static bool uvm_page_physget_freelist(paddr_t *, int); 626 1.28 drochner 627 1.118 thorpej static bool 628 1.105 thorpej uvm_page_physget_freelist(paddr_t *paddrp, int freelist) 629 1.1 mrg { 630 1.190 cherry uvm_physseg_t lcv; 631 1.1 mrg 632 1.7 mrg /* pass 1: try allocating from a matching end */ 633 1.1 mrg #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 634 1.191 skrll for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 635 1.1 mrg #else 636 1.191 skrll for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 637 1.1 mrg #endif 638 1.7 mrg { 639 1.119 thorpej if (uvm.page_init_done == true) 640 1.42 mrg panic("uvm_page_physget: called _after_ bootstrap"); 641 1.1 mrg 642 1.190 cherry /* Try to match at front or back on unused segment */ 643 1.200 maxv if (uvm_page_physunload(lcv, freelist, paddrp)) 644 1.190 cherry return true; 645 1.191 skrll } 646 1.1 mrg 647 1.7 mrg /* pass2: forget about matching ends, just allocate something */ 648 1.1 mrg #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 649 1.191 skrll for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv)) 650 1.1 mrg #else 651 1.191 skrll for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv)) 652 1.1 mrg #endif 653 1.7 mrg { 654 1.190 cherry /* Try the front regardless. */ 655 1.200 maxv if (uvm_page_physunload_force(lcv, freelist, paddrp)) 656 1.190 cherry return true; 657 1.190 cherry } 658 1.190 cherry return false; 659 1.28 drochner } 660 1.28 drochner 661 1.118 thorpej bool 662 1.105 thorpej uvm_page_physget(paddr_t *paddrp) 663 1.28 drochner { 664 1.28 drochner int i; 665 1.28 drochner 666 1.28 drochner /* try in the order of freelist preference */ 667 1.28 drochner for (i = 0; i < VM_NFREELIST; i++) 668 1.119 thorpej if (uvm_page_physget_freelist(paddrp, i) == true) 669 1.119 thorpej return (true); 670 1.119 thorpej return (false); 671 1.1 mrg } 672 1.1 mrg #endif /* PMAP_STEAL_MEMORY */ 673 1.1 mrg 674 1.163 uebayasi paddr_t 675 1.163 uebayasi uvm_vm_page_to_phys(const struct vm_page *pg) 676 1.163 uebayasi { 677 1.163 uebayasi 678 1.211 ad return pg->phys_addr & ~(PAGE_SIZE - 1); 679 1.163 uebayasi } 680 1.163 uebayasi 681 1.163 uebayasi /* 682 1.213 ad * uvm_page_numa_load: load NUMA range description. 683 1.213 ad */ 684 1.213 ad void 685 1.213 ad uvm_page_numa_load(paddr_t start, paddr_t size, u_int numa_id) 686 1.213 ad { 687 1.213 ad struct uvm_page_numa_region *d; 688 1.213 ad 689 1.213 ad KASSERT(numa_id < PGFL_MAX_BUCKETS); 690 1.213 ad 691 1.213 ad d = kmem_alloc(sizeof(*d), KM_SLEEP); 692 1.213 ad d->start = start; 693 1.213 ad d->size = size; 694 1.213 ad d->numa_id = numa_id; 695 1.213 ad d->next = uvm_page_numa_region; 696 1.213 ad uvm_page_numa_region = d; 697 1.213 ad } 698 1.213 ad 699 1.213 ad /* 700 1.213 ad * uvm_page_numa_lookup: lookup NUMA node for the given page. 701 1.213 ad */ 702 1.213 ad static u_int 703 1.213 ad uvm_page_numa_lookup(struct vm_page *pg) 704 1.213 ad { 705 1.213 ad struct uvm_page_numa_region *d; 706 1.213 ad static bool warned; 707 1.213 ad paddr_t pa; 708 1.213 ad 709 1.213 ad KASSERT(uvm_page_numa_region != NULL); 710 1.213 ad 711 1.213 ad pa = VM_PAGE_TO_PHYS(pg); 712 1.213 ad for (d = uvm_page_numa_region; d != NULL; d = d->next) { 713 1.213 ad if (pa >= d->start && pa < d->start + d->size) { 714 1.213 ad return d->numa_id; 715 1.213 ad } 716 1.213 ad } 717 1.213 ad 718 1.213 ad if (!warned) { 719 1.215 martin printf("uvm_page_numa_lookup: failed, first pg=%p pa=%#" 720 1.215 martin PRIxPADDR "\n", pg, VM_PAGE_TO_PHYS(pg)); 721 1.213 ad warned = true; 722 1.213 ad } 723 1.213 ad 724 1.213 ad return 0; 725 1.213 ad } 726 1.213 ad 727 1.213 ad /* 728 1.213 ad * uvm_page_redim: adjust freelist dimensions if they have changed. 729 1.60 thorpej */ 730 1.60 thorpej 731 1.213 ad static void 732 1.213 ad uvm_page_redim(int newncolors, int newnbuckets) 733 1.60 thorpej { 734 1.213 ad struct pgfreelist npgfl; 735 1.213 ad struct pgflbucket *opgb, *npgb; 736 1.213 ad struct pgflist *ohead, *nhead; 737 1.230 skrll struct vm_page *pg; 738 1.213 ad size_t bucketsize, bucketmemsize, oldbucketmemsize; 739 1.213 ad int fl, ob, oc, nb, nc, obuckets, ocolors; 740 1.213 ad char *bucketarray, *oldbucketmem, *bucketmem; 741 1.60 thorpej 742 1.178 uebayasi KASSERT(((newncolors - 1) & newncolors) == 0); 743 1.178 uebayasi 744 1.213 ad /* Anything to do? */ 745 1.213 ad if (newncolors <= uvmexp.ncolors && 746 1.213 ad newnbuckets == uvm.bucketcount) { 747 1.60 thorpej return; 748 1.213 ad } 749 1.119 thorpej if (uvm.page_init_done == false) { 750 1.77 wrstuden uvmexp.ncolors = newncolors; 751 1.77 wrstuden return; 752 1.77 wrstuden } 753 1.60 thorpej 754 1.213 ad bucketsize = offsetof(struct pgflbucket, pgb_colors[newncolors]); 755 1.213 ad bucketsize = roundup2(bucketsize, coherency_unit); 756 1.213 ad bucketmemsize = bucketsize * newnbuckets * VM_NFREELIST + 757 1.213 ad coherency_unit - 1; 758 1.213 ad bucketmem = kmem_zalloc(bucketmemsize, KM_SLEEP); 759 1.213 ad bucketarray = (char *)roundup2((uintptr_t)bucketmem, coherency_unit); 760 1.213 ad 761 1.213 ad ocolors = uvmexp.ncolors; 762 1.213 ad obuckets = uvm.bucketcount; 763 1.60 thorpej 764 1.255 andvar /* Freelist cache mustn't be enabled. */ 765 1.213 ad uvm_pgflcache_pause(); 766 1.60 thorpej 767 1.60 thorpej /* Make sure we should still do this. */ 768 1.213 ad uvm_pgfl_lock(); 769 1.213 ad if (newncolors <= uvmexp.ncolors && 770 1.213 ad newnbuckets == uvm.bucketcount) { 771 1.213 ad uvm_pgfl_unlock(); 772 1.216 ad uvm_pgflcache_resume(); 773 1.213 ad kmem_free(bucketmem, bucketmemsize); 774 1.60 thorpej return; 775 1.60 thorpej } 776 1.60 thorpej 777 1.60 thorpej uvmexp.ncolors = newncolors; 778 1.60 thorpej uvmexp.colormask = uvmexp.ncolors - 1; 779 1.213 ad uvm.bucketcount = newnbuckets; 780 1.60 thorpej 781 1.213 ad for (fl = 0; fl < VM_NFREELIST; fl++) { 782 1.213 ad /* Init new buckets in new freelist. */ 783 1.213 ad memset(&npgfl, 0, sizeof(npgfl)); 784 1.213 ad for (nb = 0; nb < newnbuckets; nb++) { 785 1.213 ad npgb = (struct pgflbucket *)bucketarray; 786 1.213 ad uvm_page_init_bucket(&npgfl, npgb, nb); 787 1.213 ad bucketarray += bucketsize; 788 1.213 ad } 789 1.213 ad /* Now transfer pages from the old freelist. */ 790 1.213 ad for (nb = ob = 0; ob < obuckets; ob++) { 791 1.213 ad opgb = uvm.page_free[fl].pgfl_buckets[ob]; 792 1.213 ad for (oc = 0; oc < ocolors; oc++) { 793 1.213 ad ohead = &opgb->pgb_colors[oc]; 794 1.213 ad while ((pg = LIST_FIRST(ohead)) != NULL) { 795 1.213 ad LIST_REMOVE(pg, pageq.list); 796 1.213 ad /* 797 1.213 ad * Here we decide on the NEW color & 798 1.213 ad * bucket for the page. For NUMA 799 1.213 ad * we'll use the info that the 800 1.221 ad * hardware gave us. For non-NUMA 801 1.221 ad * assign take physical page frame 802 1.221 ad * number and cache color into 803 1.221 ad * account. We do this to try and 804 1.221 ad * avoid defeating any memory 805 1.221 ad * interleaving in the hardware. 806 1.213 ad */ 807 1.213 ad KASSERT( 808 1.213 ad uvm_page_get_bucket(pg) == ob); 809 1.213 ad KASSERT(fl == 810 1.213 ad uvm_page_get_freelist(pg)); 811 1.235 ad if (uvm_page_numa_region != NULL) { 812 1.213 ad nb = uvm_page_numa_lookup(pg); 813 1.213 ad } else { 814 1.221 ad nb = atop(VM_PAGE_TO_PHYS(pg)) 815 1.221 ad / uvmexp.ncolors / 8 816 1.221 ad % newnbuckets; 817 1.213 ad } 818 1.213 ad uvm_page_set_bucket(pg, nb); 819 1.213 ad npgb = npgfl.pgfl_buckets[nb]; 820 1.213 ad npgb->pgb_nfree++; 821 1.213 ad nc = VM_PGCOLOR(pg); 822 1.213 ad nhead = &npgb->pgb_colors[nc]; 823 1.213 ad LIST_INSERT_HEAD(nhead, pg, pageq.list); 824 1.60 thorpej } 825 1.60 thorpej } 826 1.60 thorpej } 827 1.213 ad /* Install the new freelist. */ 828 1.213 ad memcpy(&uvm.page_free[fl], &npgfl, sizeof(npgfl)); 829 1.60 thorpej } 830 1.60 thorpej 831 1.213 ad /* Unlock and free the old memory. */ 832 1.179 para oldbucketmemsize = recolored_pages_memsize; 833 1.213 ad oldbucketmem = recolored_pages_mem; 834 1.179 para recolored_pages_memsize = bucketmemsize; 835 1.213 ad recolored_pages_mem = bucketmem; 836 1.216 ad 837 1.213 ad uvm_pgfl_unlock(); 838 1.216 ad uvm_pgflcache_resume(); 839 1.176 matt 840 1.179 para if (oldbucketmemsize) { 841 1.213 ad kmem_free(oldbucketmem, oldbucketmemsize); 842 1.179 para } 843 1.60 thorpej 844 1.177 mrg /* 845 1.177 mrg * this calls uvm_km_alloc() which may want to hold 846 1.213 ad * uvm_freelist_lock. 847 1.177 mrg */ 848 1.177 mrg uvm_pager_realloc_emerg(); 849 1.60 thorpej } 850 1.1 mrg 851 1.1 mrg /* 852 1.213 ad * uvm_page_recolor: Recolor the pages if the new color count is 853 1.213 ad * larger than the old one. 854 1.213 ad */ 855 1.213 ad 856 1.213 ad void 857 1.213 ad uvm_page_recolor(int newncolors) 858 1.213 ad { 859 1.213 ad 860 1.213 ad uvm_page_redim(newncolors, uvm.bucketcount); 861 1.213 ad } 862 1.213 ad 863 1.213 ad /* 864 1.213 ad * uvm_page_rebucket: Determine a bucket structure and redim the free 865 1.213 ad * lists to match. 866 1.213 ad */ 867 1.213 ad 868 1.213 ad void 869 1.213 ad uvm_page_rebucket(void) 870 1.213 ad { 871 1.213 ad u_int min_numa, max_numa, npackage, shift; 872 1.213 ad struct cpu_info *ci, *ci2, *ci3; 873 1.213 ad CPU_INFO_ITERATOR cii; 874 1.213 ad 875 1.213 ad /* 876 1.213 ad * If we have more than one NUMA node, and the maximum NUMA node ID 877 1.213 ad * is less than PGFL_MAX_BUCKETS, then we'll use NUMA distribution 878 1.235 ad * for free pages. 879 1.213 ad */ 880 1.213 ad min_numa = (u_int)-1; 881 1.213 ad max_numa = 0; 882 1.213 ad for (CPU_INFO_FOREACH(cii, ci)) { 883 1.213 ad if (ci->ci_numa_id < min_numa) { 884 1.213 ad min_numa = ci->ci_numa_id; 885 1.213 ad } 886 1.213 ad if (ci->ci_numa_id > max_numa) { 887 1.213 ad max_numa = ci->ci_numa_id; 888 1.213 ad } 889 1.213 ad } 890 1.213 ad if (min_numa != max_numa && max_numa < PGFL_MAX_BUCKETS) { 891 1.213 ad aprint_debug("UVM: using NUMA allocation scheme\n"); 892 1.230 skrll for (CPU_INFO_FOREACH(cii, ci)) { 893 1.213 ad ci->ci_data.cpu_uvm->pgflbucket = ci->ci_numa_id; 894 1.213 ad } 895 1.213 ad uvm_page_redim(uvmexp.ncolors, max_numa + 1); 896 1.213 ad return; 897 1.213 ad } 898 1.213 ad 899 1.213 ad /* 900 1.213 ad * Otherwise we'll go with a scheme to maximise L2/L3 cache locality 901 1.213 ad * and minimise lock contention. Count the total number of CPU 902 1.213 ad * packages, and then try to distribute the buckets among CPU 903 1.235 ad * packages evenly. 904 1.213 ad */ 905 1.222 ad npackage = curcpu()->ci_nsibling[CPUREL_PACKAGE1ST]; 906 1.230 skrll 907 1.213 ad /* 908 1.213 ad * Figure out how to arrange the packages & buckets, and the total 909 1.213 ad * number of buckets we need. XXX 2 may not be the best factor. 910 1.213 ad */ 911 1.213 ad for (shift = 0; npackage > PGFL_MAX_BUCKETS; shift++) { 912 1.213 ad npackage >>= 1; 913 1.213 ad } 914 1.213 ad uvm_page_redim(uvmexp.ncolors, npackage); 915 1.213 ad 916 1.213 ad /* 917 1.213 ad * Now tell each CPU which bucket to use. In the outer loop, scroll 918 1.213 ad * through all CPU packages. 919 1.213 ad */ 920 1.213 ad npackage = 0; 921 1.213 ad ci = curcpu(); 922 1.222 ad ci2 = ci->ci_sibling[CPUREL_PACKAGE1ST]; 923 1.213 ad do { 924 1.213 ad /* 925 1.213 ad * In the inner loop, scroll through all CPUs in the package 926 1.213 ad * and assign the same bucket ID. 927 1.213 ad */ 928 1.213 ad ci3 = ci2; 929 1.213 ad do { 930 1.213 ad ci3->ci_data.cpu_uvm->pgflbucket = npackage >> shift; 931 1.213 ad ci3 = ci3->ci_sibling[CPUREL_PACKAGE]; 932 1.213 ad } while (ci3 != ci2); 933 1.213 ad npackage++; 934 1.222 ad ci2 = ci2->ci_sibling[CPUREL_PACKAGE1ST]; 935 1.222 ad } while (ci2 != ci->ci_sibling[CPUREL_PACKAGE1ST]); 936 1.213 ad 937 1.213 ad aprint_debug("UVM: using package allocation scheme, " 938 1.213 ad "%d package(s) per bucket\n", 1 << shift); 939 1.213 ad } 940 1.213 ad 941 1.213 ad /* 942 1.133 ad * uvm_cpu_attach: initialize per-CPU data structures. 943 1.133 ad */ 944 1.133 ad 945 1.133 ad void 946 1.133 ad uvm_cpu_attach(struct cpu_info *ci) 947 1.133 ad { 948 1.133 ad struct uvm_cpu *ucpu; 949 1.133 ad 950 1.213 ad /* Already done in uvm_page_init(). */ 951 1.213 ad if (!CPU_IS_PRIMARY(ci)) { 952 1.213 ad /* Add more reserve pages for this CPU. */ 953 1.213 ad uvmexp.reserve_kernel += vm_page_reserve_kernel; 954 1.213 ad 955 1.213 ad /* Allocate per-CPU data structures. */ 956 1.213 ad ucpu = kmem_zalloc(sizeof(struct uvm_cpu) + coherency_unit - 1, 957 1.213 ad KM_SLEEP); 958 1.213 ad ucpu = (struct uvm_cpu *)roundup2((uintptr_t)ucpu, 959 1.213 ad coherency_unit); 960 1.213 ad ci->ci_data.cpu_uvm = ucpu; 961 1.214 ad } else { 962 1.214 ad ucpu = ci->ci_data.cpu_uvm; 963 1.133 ad } 964 1.181 tls 965 1.220 ad uvmpdpol_init_cpu(ucpu); 966 1.133 ad } 967 1.133 ad 968 1.133 ad /* 969 1.219 ad * uvm_availmem: fetch the total amount of free memory in pages. this can 970 1.219 ad * have a detrimental effect on performance due to false sharing; don't call 971 1.219 ad * unless needed. 972 1.240 ad * 973 1.240 ad * some users can request the amount of free memory so often that it begins 974 1.240 ad * to impact upon performance. if calling frequently and an inexact value 975 1.240 ad * is okay, call with cached = true. 976 1.207 ad */ 977 1.207 ad 978 1.207 ad int 979 1.239 ad uvm_availmem(bool cached) 980 1.207 ad { 981 1.240 ad int64_t fp; 982 1.207 ad 983 1.240 ad cpu_count_sync(cached); 984 1.240 ad if ((fp = cpu_count_get(CPU_COUNT_FREEPAGES)) < 0) { 985 1.240 ad /* 986 1.240 ad * XXXAD could briefly go negative because it's impossible 987 1.240 ad * to get a clean snapshot. address this for other counters 988 1.240 ad * used as running totals before NetBSD 10 although less 989 1.240 ad * important for those. 990 1.240 ad */ 991 1.240 ad fp = 0; 992 1.213 ad } 993 1.240 ad return (int)fp; 994 1.207 ad } 995 1.207 ad 996 1.207 ad /* 997 1.213 ad * uvm_pagealloc_pgb: helper routine that tries to allocate any color from a 998 1.213 ad * specific freelist and specific bucket only. 999 1.213 ad * 1000 1.213 ad * => must be at IPL_VM or higher to protect per-CPU data structures. 1001 1.54 thorpej */ 1002 1.54 thorpej 1003 1.114 thorpej static struct vm_page * 1004 1.213 ad uvm_pagealloc_pgb(struct uvm_cpu *ucpu, int f, int b, int *trycolorp, int flags) 1005 1.54 thorpej { 1006 1.213 ad int c, trycolor, colormask; 1007 1.213 ad struct pgflbucket *pgb; 1008 1.54 thorpej struct vm_page *pg; 1009 1.213 ad kmutex_t *lock; 1010 1.217 ad bool fill; 1011 1.213 ad 1012 1.213 ad /* 1013 1.213 ad * Skip the bucket if empty, no lock needed. There could be many 1014 1.213 ad * empty freelists/buckets. 1015 1.213 ad */ 1016 1.213 ad pgb = uvm.page_free[f].pgfl_buckets[b]; 1017 1.213 ad if (pgb->pgb_nfree == 0) { 1018 1.213 ad return NULL; 1019 1.213 ad } 1020 1.54 thorpej 1021 1.213 ad /* Skip bucket if low on memory. */ 1022 1.213 ad lock = &uvm_freelist_locks[b].lock; 1023 1.213 ad mutex_spin_enter(lock); 1024 1.213 ad if (__predict_false(pgb->pgb_nfree <= uvmexp.reserve_kernel)) { 1025 1.213 ad if ((flags & UVM_PGA_USERESERVE) == 0 || 1026 1.213 ad (pgb->pgb_nfree <= uvmexp.reserve_pagedaemon && 1027 1.213 ad curlwp != uvm.pagedaemon_lwp)) { 1028 1.213 ad mutex_spin_exit(lock); 1029 1.213 ad return NULL; 1030 1.213 ad } 1031 1.217 ad fill = false; 1032 1.217 ad } else { 1033 1.217 ad fill = true; 1034 1.213 ad } 1035 1.130 ad 1036 1.213 ad /* Try all page colors as needed. */ 1037 1.213 ad c = trycolor = *trycolorp; 1038 1.213 ad colormask = uvmexp.colormask; 1039 1.58 enami do { 1040 1.213 ad pg = LIST_FIRST(&pgb->pgb_colors[c]); 1041 1.213 ad if (__predict_true(pg != NULL)) { 1042 1.213 ad /* 1043 1.213 ad * Got a free page! PG_FREE must be cleared under 1044 1.213 ad * lock because of uvm_pglistalloc(). 1045 1.213 ad */ 1046 1.213 ad LIST_REMOVE(pg, pageq.list); 1047 1.242 ad KASSERT(pg->flags == PG_FREE); 1048 1.242 ad pg->flags = PG_BUSY | PG_CLEAN | PG_FAKE; 1049 1.213 ad pgb->pgb_nfree--; 1050 1.249 chs CPU_COUNT(CPU_COUNT_FREEPAGES, -1); 1051 1.230 skrll 1052 1.213 ad /* 1053 1.213 ad * While we have the bucket locked and our data 1054 1.213 ad * structures fresh in L1 cache, we have an ideal 1055 1.213 ad * opportunity to grab some pages for the freelist 1056 1.213 ad * cache without causing extra contention. Only do 1057 1.213 ad * so if we found pages in this CPU's preferred 1058 1.213 ad * bucket. 1059 1.213 ad */ 1060 1.217 ad if (__predict_true(b == ucpu->pgflbucket && fill)) { 1061 1.213 ad uvm_pgflcache_fill(ucpu, f, b, c); 1062 1.213 ad } 1063 1.213 ad mutex_spin_exit(lock); 1064 1.213 ad KASSERT(uvm_page_get_bucket(pg) == b); 1065 1.213 ad CPU_COUNT(c == trycolor ? 1066 1.213 ad CPU_COUNT_COLORHIT : CPU_COUNT_COLORMISS, 1); 1067 1.213 ad CPU_COUNT(CPU_COUNT_CPUMISS, 1); 1068 1.213 ad *trycolorp = c; 1069 1.213 ad return pg; 1070 1.133 ad } 1071 1.213 ad c = (c + 1) & colormask; 1072 1.213 ad } while (c != trycolor); 1073 1.213 ad mutex_spin_exit(lock); 1074 1.213 ad 1075 1.213 ad return NULL; 1076 1.213 ad } 1077 1.213 ad 1078 1.213 ad /* 1079 1.213 ad * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat that allocates 1080 1.213 ad * any color from any bucket, in a specific freelist. 1081 1.213 ad * 1082 1.213 ad * => must be at IPL_VM or higher to protect per-CPU data structures. 1083 1.213 ad */ 1084 1.54 thorpej 1085 1.213 ad static struct vm_page * 1086 1.213 ad uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int f, int *trycolorp, int flags) 1087 1.213 ad { 1088 1.213 ad int b, trybucket, bucketcount; 1089 1.213 ad struct vm_page *pg; 1090 1.54 thorpej 1091 1.213 ad /* Try for the exact thing in the per-CPU cache. */ 1092 1.213 ad if ((pg = uvm_pgflcache_alloc(ucpu, f, *trycolorp)) != NULL) { 1093 1.213 ad CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1094 1.213 ad CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1095 1.213 ad return pg; 1096 1.54 thorpej } 1097 1.54 thorpej 1098 1.213 ad /* Walk through all buckets, trying our preferred bucket first. */ 1099 1.213 ad trybucket = ucpu->pgflbucket; 1100 1.213 ad b = trybucket; 1101 1.213 ad bucketcount = uvm.bucketcount; 1102 1.213 ad do { 1103 1.213 ad pg = uvm_pagealloc_pgb(ucpu, f, b, trycolorp, flags); 1104 1.213 ad if (pg != NULL) { 1105 1.213 ad return pg; 1106 1.213 ad } 1107 1.213 ad b = (b + 1 == bucketcount ? 0 : b + 1); 1108 1.213 ad } while (b != trybucket); 1109 1.213 ad 1110 1.213 ad return NULL; 1111 1.54 thorpej } 1112 1.54 thorpej 1113 1.54 thorpej /* 1114 1.12 thorpej * uvm_pagealloc_strat: allocate vm_page from a particular free list. 1115 1.1 mrg * 1116 1.1 mrg * => return null if no pages free 1117 1.1 mrg * => wake up pagedaemon if number of free pages drops below low water mark 1118 1.133 ad * => if obj != NULL, obj must be locked (to put in obj's tree) 1119 1.1 mrg * => if anon != NULL, anon must be locked (to put in anon) 1120 1.1 mrg * => only one of obj or anon can be non-null 1121 1.1 mrg * => caller must activate/deactivate page if it is not wired. 1122 1.12 thorpej * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL. 1123 1.34 thorpej * => policy decision: it is more important to pull a page off of the 1124 1.242 ad * appropriate priority free list than it is to get a page from the 1125 1.242 ad * correct bucket or color bin. This is because we live with the 1126 1.34 thorpej * consequences of a bad free list decision for the entire 1127 1.34 thorpej * lifetime of the page, e.g. if the page comes from memory that 1128 1.34 thorpej * is slower to access. 1129 1.1 mrg */ 1130 1.1 mrg 1131 1.7 mrg struct vm_page * 1132 1.105 thorpej uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon, 1133 1.105 thorpej int flags, int strat, int free_list) 1134 1.1 mrg { 1135 1.242 ad int color, lcv, error, s; 1136 1.133 ad struct uvm_cpu *ucpu; 1137 1.7 mrg struct vm_page *pg; 1138 1.141 ad lwp_t *l; 1139 1.1 mrg 1140 1.44 chs KASSERT(obj == NULL || anon == NULL); 1141 1.169 matt KASSERT(anon == NULL || (flags & UVM_FLAG_COLORMATCH) || off == 0); 1142 1.44 chs KASSERT(off == trunc_page(off)); 1143 1.226 ad KASSERT(obj == NULL || rw_write_held(obj->vmobjlock)); 1144 1.175 rmind KASSERT(anon == NULL || anon->an_lock == NULL || 1145 1.226 ad rw_write_held(anon->an_lock)); 1146 1.48 thorpej 1147 1.7 mrg /* 1148 1.54 thorpej * This implements a global round-robin page coloring 1149 1.54 thorpej * algorithm. 1150 1.54 thorpej */ 1151 1.67 chs 1152 1.213 ad s = splvm(); 1153 1.133 ad ucpu = curcpu()->ci_data.cpu_uvm; 1154 1.169 matt if (flags & UVM_FLAG_COLORMATCH) { 1155 1.169 matt color = atop(off) & uvmexp.colormask; 1156 1.169 matt } else { 1157 1.213 ad color = ucpu->pgflcolor; 1158 1.169 matt } 1159 1.54 thorpej 1160 1.54 thorpej /* 1161 1.7 mrg * fail if any of these conditions is true: 1162 1.7 mrg * [1] there really are no free pages, or 1163 1.7 mrg * [2] only kernel "reserved" pages remain and 1164 1.141 ad * reserved pages have not been requested. 1165 1.7 mrg * [3] only pagedaemon "reserved" pages remain and 1166 1.7 mrg * the requestor isn't the pagedaemon. 1167 1.141 ad * we make kernel reserve pages available if called by a 1168 1.235 ad * kernel thread. 1169 1.7 mrg */ 1170 1.141 ad l = curlwp; 1171 1.235 ad if (__predict_true(l != NULL) && (l->l_flag & LW_SYSTEM) != 0) { 1172 1.141 ad flags |= UVM_PGA_USERESERVE; 1173 1.141 ad } 1174 1.34 thorpej 1175 1.12 thorpej again: 1176 1.12 thorpej switch (strat) { 1177 1.12 thorpej case UVM_PGA_STRAT_NORMAL: 1178 1.213 ad /* Check freelists: descending priority (ascending id) order. */ 1179 1.12 thorpej for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1180 1.213 ad pg = uvm_pagealloc_pgfl(ucpu, lcv, &color, flags); 1181 1.213 ad if (pg != NULL) { 1182 1.12 thorpej goto gotit; 1183 1.213 ad } 1184 1.12 thorpej } 1185 1.12 thorpej 1186 1.213 ad /* No pages free! Have pagedaemon free some memory. */ 1187 1.213 ad splx(s); 1188 1.213 ad uvm_kick_pdaemon(); 1189 1.213 ad return NULL; 1190 1.12 thorpej 1191 1.12 thorpej case UVM_PGA_STRAT_ONLY: 1192 1.12 thorpej case UVM_PGA_STRAT_FALLBACK: 1193 1.12 thorpej /* Attempt to allocate from the specified free list. */ 1194 1.252 riastrad KASSERT(free_list >= 0); 1195 1.252 riastrad KASSERT(free_list < VM_NFREELIST); 1196 1.213 ad pg = uvm_pagealloc_pgfl(ucpu, free_list, &color, flags); 1197 1.213 ad if (pg != NULL) { 1198 1.12 thorpej goto gotit; 1199 1.213 ad } 1200 1.12 thorpej 1201 1.12 thorpej /* Fall back, if possible. */ 1202 1.12 thorpej if (strat == UVM_PGA_STRAT_FALLBACK) { 1203 1.12 thorpej strat = UVM_PGA_STRAT_NORMAL; 1204 1.12 thorpej goto again; 1205 1.12 thorpej } 1206 1.12 thorpej 1207 1.213 ad /* No pages free! Have pagedaemon free some memory. */ 1208 1.213 ad splx(s); 1209 1.213 ad uvm_kick_pdaemon(); 1210 1.213 ad return NULL; 1211 1.213 ad 1212 1.213 ad case UVM_PGA_STRAT_NUMA: 1213 1.213 ad /* 1214 1.235 ad * NUMA strategy (experimental): allocating from the correct 1215 1.235 ad * bucket is more important than observing freelist 1216 1.235 ad * priority. Look only to the current NUMA node; if that 1217 1.235 ad * fails, we need to look to other NUMA nodes, so retry with 1218 1.235 ad * the normal strategy. 1219 1.213 ad */ 1220 1.213 ad for (lcv = 0; lcv < VM_NFREELIST; lcv++) { 1221 1.213 ad pg = uvm_pgflcache_alloc(ucpu, lcv, color); 1222 1.213 ad if (pg != NULL) { 1223 1.213 ad CPU_COUNT(CPU_COUNT_CPUHIT, 1); 1224 1.213 ad CPU_COUNT(CPU_COUNT_COLORHIT, 1); 1225 1.213 ad goto gotit; 1226 1.213 ad } 1227 1.213 ad pg = uvm_pagealloc_pgb(ucpu, lcv, 1228 1.213 ad ucpu->pgflbucket, &color, flags); 1229 1.213 ad if (pg != NULL) { 1230 1.213 ad goto gotit; 1231 1.213 ad } 1232 1.213 ad } 1233 1.213 ad strat = UVM_PGA_STRAT_NORMAL; 1234 1.213 ad goto again; 1235 1.12 thorpej 1236 1.12 thorpej default: 1237 1.12 thorpej panic("uvm_pagealloc_strat: bad strat %d", strat); 1238 1.12 thorpej /* NOTREACHED */ 1239 1.7 mrg } 1240 1.7 mrg 1241 1.12 thorpej gotit: 1242 1.54 thorpej /* 1243 1.54 thorpej * We now know which color we actually allocated from; set 1244 1.54 thorpej * the next color accordingly. 1245 1.54 thorpej */ 1246 1.67 chs 1247 1.213 ad ucpu->pgflcolor = (color + 1) & uvmexp.colormask; 1248 1.34 thorpej 1249 1.34 thorpej /* 1250 1.242 ad * while still at IPL_VM, update allocation statistics. 1251 1.34 thorpej */ 1252 1.67 chs 1253 1.212 ad if (anon) { 1254 1.224 ad CPU_COUNT(CPU_COUNT_ANONCLEAN, 1); 1255 1.212 ad } 1256 1.213 ad splx(s); 1257 1.242 ad KASSERT(pg->flags == (PG_BUSY|PG_CLEAN|PG_FAKE)); 1258 1.7 mrg 1259 1.201 ad /* 1260 1.212 ad * assign the page to the object. as the page was free, we know 1261 1.212 ad * that pg->uobject and pg->uanon are NULL. we only need to take 1262 1.212 ad * the page's interlock if we are changing the values. 1263 1.201 ad */ 1264 1.212 ad if (anon != NULL || obj != NULL) { 1265 1.212 ad mutex_enter(&pg->interlock); 1266 1.212 ad } 1267 1.7 mrg pg->offset = off; 1268 1.7 mrg pg->uobject = obj; 1269 1.7 mrg pg->uanon = anon; 1270 1.226 ad KASSERT(uvm_page_owner_locked_p(pg, true)); 1271 1.7 mrg if (anon) { 1272 1.103 yamt anon->an_page = pg; 1273 1.201 ad pg->flags |= PG_ANON; 1274 1.212 ad mutex_exit(&pg->interlock); 1275 1.201 ad } else if (obj) { 1276 1.224 ad /* 1277 1.224 ad * set PG_FILE|PG_AOBJ before the first uvm_pageinsert. 1278 1.224 ad */ 1279 1.224 ad if (UVM_OBJ_IS_VNODE(obj)) { 1280 1.224 ad pg->flags |= PG_FILE; 1281 1.236 ad } else if (UVM_OBJ_IS_AOBJ(obj)) { 1282 1.224 ad pg->flags |= PG_AOBJ; 1283 1.224 ad } 1284 1.206 ad uvm_pageinsert_object(obj, pg); 1285 1.212 ad mutex_exit(&pg->interlock); 1286 1.206 ad error = uvm_pageinsert_tree(obj, pg); 1287 1.202 ad if (error != 0) { 1288 1.212 ad mutex_enter(&pg->interlock); 1289 1.206 ad uvm_pageremove_object(obj, pg); 1290 1.212 ad mutex_exit(&pg->interlock); 1291 1.202 ad uvm_pagefree(pg); 1292 1.202 ad return NULL; 1293 1.202 ad } 1294 1.7 mrg } 1295 1.143 drochner 1296 1.1 mrg #if defined(UVM_PAGE_TRKOWN) 1297 1.7 mrg pg->owner_tag = NULL; 1298 1.1 mrg #endif 1299 1.7 mrg UVM_PAGE_OWN(pg, "new alloc"); 1300 1.33 thorpej 1301 1.33 thorpej if (flags & UVM_PGA_ZERO) { 1302 1.242 ad /* A zero'd page is not clean. */ 1303 1.224 ad if (obj != NULL || anon != NULL) { 1304 1.224 ad uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1305 1.224 ad } 1306 1.242 ad pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1307 1.33 thorpej } 1308 1.1 mrg 1309 1.7 mrg return(pg); 1310 1.1 mrg } 1311 1.1 mrg 1312 1.1 mrg /* 1313 1.96 yamt * uvm_pagereplace: replace a page with another 1314 1.96 yamt * 1315 1.96 yamt * => object must be locked 1316 1.220 ad * => page interlocks must be held 1317 1.96 yamt */ 1318 1.96 yamt 1319 1.96 yamt void 1320 1.105 thorpej uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg) 1321 1.96 yamt { 1322 1.136 yamt struct uvm_object *uobj = oldpg->uobject; 1323 1.217 ad struct vm_page *pg __diagused; 1324 1.224 ad uint64_t idx; 1325 1.97 junyoung 1326 1.96 yamt KASSERT((oldpg->flags & PG_TABLED) != 0); 1327 1.136 yamt KASSERT(uobj != NULL); 1328 1.96 yamt KASSERT((newpg->flags & PG_TABLED) == 0); 1329 1.96 yamt KASSERT(newpg->uobject == NULL); 1330 1.226 ad KASSERT(rw_write_held(uobj->vmobjlock)); 1331 1.220 ad KASSERT(mutex_owned(&oldpg->interlock)); 1332 1.220 ad KASSERT(mutex_owned(&newpg->interlock)); 1333 1.96 yamt 1334 1.224 ad newpg->uobject = uobj; 1335 1.96 yamt newpg->offset = oldpg->offset; 1336 1.224 ad idx = newpg->offset >> PAGE_SHIFT; 1337 1.224 ad pg = radix_tree_replace_node(&uobj->uo_pages, idx, newpg); 1338 1.217 ad KASSERT(pg == oldpg); 1339 1.224 ad if (((oldpg->flags ^ newpg->flags) & PG_CLEAN) != 0) { 1340 1.224 ad if ((newpg->flags & PG_CLEAN) != 0) { 1341 1.245 chs uvm_obj_page_clear_dirty(newpg); 1342 1.224 ad } else { 1343 1.245 chs uvm_obj_page_set_dirty(newpg); 1344 1.224 ad } 1345 1.224 ad } 1346 1.224 ad /* 1347 1.224 ad * oldpg's PG_STAT is stable. newpg is not reachable by others yet. 1348 1.224 ad */ 1349 1.224 ad newpg->flags |= 1350 1.224 ad (newpg->flags & ~PG_STAT) | (oldpg->flags & PG_STAT); 1351 1.203 ad uvm_pageinsert_object(uobj, newpg); 1352 1.203 ad uvm_pageremove_object(uobj, oldpg); 1353 1.96 yamt } 1354 1.96 yamt 1355 1.96 yamt /* 1356 1.1 mrg * uvm_pagerealloc: reallocate a page from one object to another 1357 1.1 mrg * 1358 1.1 mrg * => both objects must be locked 1359 1.1 mrg */ 1360 1.1 mrg 1361 1.241 ad int 1362 1.105 thorpej uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff) 1363 1.1 mrg { 1364 1.241 ad int error = 0; 1365 1.241 ad 1366 1.7 mrg /* 1367 1.7 mrg * remove it from the old object 1368 1.7 mrg */ 1369 1.7 mrg 1370 1.7 mrg if (pg->uobject) { 1371 1.206 ad uvm_pageremove_tree(pg->uobject, pg); 1372 1.206 ad uvm_pageremove_object(pg->uobject, pg); 1373 1.7 mrg } 1374 1.7 mrg 1375 1.7 mrg /* 1376 1.7 mrg * put it in the new object 1377 1.7 mrg */ 1378 1.7 mrg 1379 1.7 mrg if (newobj) { 1380 1.241 ad mutex_enter(&pg->interlock); 1381 1.241 ad pg->uobject = newobj; 1382 1.241 ad pg->offset = newoff; 1383 1.241 ad if (UVM_OBJ_IS_VNODE(newobj)) { 1384 1.241 ad pg->flags |= PG_FILE; 1385 1.241 ad } else if (UVM_OBJ_IS_AOBJ(newobj)) { 1386 1.241 ad pg->flags |= PG_AOBJ; 1387 1.241 ad } 1388 1.241 ad uvm_pageinsert_object(newobj, pg); 1389 1.241 ad mutex_exit(&pg->interlock); 1390 1.241 ad error = uvm_pageinsert_tree(newobj, pg); 1391 1.241 ad if (error != 0) { 1392 1.241 ad mutex_enter(&pg->interlock); 1393 1.241 ad uvm_pageremove_object(newobj, pg); 1394 1.241 ad mutex_exit(&pg->interlock); 1395 1.241 ad } 1396 1.7 mrg } 1397 1.241 ad 1398 1.241 ad return error; 1399 1.1 mrg } 1400 1.1 mrg 1401 1.1 mrg /* 1402 1.1 mrg * uvm_pagefree: free page 1403 1.1 mrg * 1404 1.133 ad * => erase page's identity (i.e. remove from object) 1405 1.1 mrg * => put page on free list 1406 1.1 mrg * => caller must lock owning object (either anon or uvm_object) 1407 1.1 mrg * => assumes all valid mappings of pg are gone 1408 1.1 mrg */ 1409 1.1 mrg 1410 1.44 chs void 1411 1.105 thorpej uvm_pagefree(struct vm_page *pg) 1412 1.1 mrg { 1413 1.213 ad struct pgfreelist *pgfl; 1414 1.213 ad struct pgflbucket *pgb; 1415 1.133 ad struct uvm_cpu *ucpu; 1416 1.213 ad kmutex_t *lock; 1417 1.213 ad int bucket, s; 1418 1.213 ad bool locked; 1419 1.67 chs 1420 1.44 chs #ifdef DEBUG 1421 1.44 chs if (pg->uobject == (void *)0xdeadbeef && 1422 1.44 chs pg->uanon == (void *)0xdeadbeef) { 1423 1.79 provos panic("uvm_pagefree: freeing free page %p", pg); 1424 1.44 chs } 1425 1.91 yamt #endif /* DEBUG */ 1426 1.44 chs 1427 1.123 ad KASSERT((pg->flags & PG_PAGEOUT) == 0); 1428 1.201 ad KASSERT(!(pg->flags & PG_FREE)); 1429 1.226 ad KASSERT(pg->uobject == NULL || rw_write_held(pg->uobject->vmobjlock)); 1430 1.127 ad KASSERT(pg->uobject != NULL || pg->uanon == NULL || 1431 1.226 ad rw_write_held(pg->uanon->an_lock)); 1432 1.123 ad 1433 1.7 mrg /* 1434 1.229 skrll * remove the page from the object's tree before acquiring any page 1435 1.206 ad * interlocks: this can acquire locks to free radixtree nodes. 1436 1.206 ad */ 1437 1.206 ad if (pg->uobject != NULL) { 1438 1.206 ad uvm_pageremove_tree(pg->uobject, pg); 1439 1.206 ad } 1440 1.206 ad 1441 1.206 ad /* 1442 1.67 chs * if the page is loaned, resolve the loan instead of freeing. 1443 1.7 mrg */ 1444 1.7 mrg 1445 1.67 chs if (pg->loan_count) { 1446 1.70 chs KASSERT(pg->wire_count == 0); 1447 1.7 mrg 1448 1.7 mrg /* 1449 1.67 chs * if the page is owned by an anon then we just want to 1450 1.70 chs * drop anon ownership. the kernel will free the page when 1451 1.70 chs * it is done with it. if the page is owned by an object, 1452 1.70 chs * remove it from the object and mark it dirty for the benefit 1453 1.70 chs * of possible anon owners. 1454 1.70 chs * 1455 1.70 chs * regardless of previous ownership, wakeup any waiters, 1456 1.70 chs * unbusy the page, and we're done. 1457 1.7 mrg */ 1458 1.7 mrg 1459 1.220 ad uvm_pagelock(pg); 1460 1.201 ad locked = true; 1461 1.73 chs if (pg->uobject != NULL) { 1462 1.206 ad uvm_pageremove_object(pg->uobject, pg); 1463 1.224 ad pg->flags &= ~(PG_FILE|PG_AOBJ); 1464 1.73 chs } else if (pg->uanon != NULL) { 1465 1.201 ad if ((pg->flags & PG_ANON) == 0) { 1466 1.73 chs pg->loan_count--; 1467 1.73 chs } else { 1468 1.240 ad const unsigned status = uvm_pagegetdirty(pg); 1469 1.201 ad pg->flags &= ~PG_ANON; 1470 1.240 ad cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1471 1.73 chs } 1472 1.103 yamt pg->uanon->an_page = NULL; 1473 1.73 chs pg->uanon = NULL; 1474 1.67 chs } 1475 1.231 ad if (pg->pqflags & PQ_WANTED) { 1476 1.70 chs wakeup(pg); 1477 1.70 chs } 1478 1.231 ad pg->pqflags &= ~PQ_WANTED; 1479 1.231 ad pg->flags &= ~(PG_BUSY|PG_RELEASED|PG_PAGER1); 1480 1.70 chs #ifdef UVM_PAGE_TRKOWN 1481 1.70 chs pg->owner_tag = NULL; 1482 1.70 chs #endif 1483 1.224 ad KASSERT((pg->flags & PG_STAT) == 0); 1484 1.73 chs if (pg->loan_count) { 1485 1.115 yamt KASSERT(pg->uobject == NULL); 1486 1.115 yamt if (pg->uanon == NULL) { 1487 1.115 yamt uvm_pagedequeue(pg); 1488 1.115 yamt } 1489 1.220 ad uvm_pageunlock(pg); 1490 1.73 chs return; 1491 1.73 chs } 1492 1.201 ad } else if (pg->uobject != NULL || pg->uanon != NULL || 1493 1.201 ad pg->wire_count != 0) { 1494 1.220 ad uvm_pagelock(pg); 1495 1.201 ad locked = true; 1496 1.201 ad } else { 1497 1.201 ad locked = false; 1498 1.67 chs } 1499 1.62 chs 1500 1.67 chs /* 1501 1.67 chs * remove page from its object or anon. 1502 1.67 chs */ 1503 1.73 chs if (pg->uobject != NULL) { 1504 1.206 ad uvm_pageremove_object(pg->uobject, pg); 1505 1.73 chs } else if (pg->uanon != NULL) { 1506 1.224 ad const unsigned int status = uvm_pagegetdirty(pg); 1507 1.103 yamt pg->uanon->an_page = NULL; 1508 1.201 ad pg->uanon = NULL; 1509 1.240 ad cpu_count(CPU_COUNT_ANONUNKNOWN + status, -1); 1510 1.7 mrg } 1511 1.1 mrg 1512 1.7 mrg /* 1513 1.7 mrg * if the page was wired, unwire it now. 1514 1.7 mrg */ 1515 1.44 chs 1516 1.34 thorpej if (pg->wire_count) { 1517 1.7 mrg pg->wire_count = 0; 1518 1.201 ad atomic_dec_uint(&uvmexp.wired); 1519 1.201 ad } 1520 1.201 ad if (locked) { 1521 1.220 ad /* 1522 1.231 ad * wake anyone waiting on the page. 1523 1.231 ad */ 1524 1.231 ad if ((pg->pqflags & PQ_WANTED) != 0) { 1525 1.231 ad pg->pqflags &= ~PQ_WANTED; 1526 1.231 ad wakeup(pg); 1527 1.231 ad } 1528 1.231 ad 1529 1.231 ad /* 1530 1.220 ad * now remove the page from the queues. 1531 1.220 ad */ 1532 1.220 ad uvm_pagedequeue(pg); 1533 1.220 ad uvm_pageunlock(pg); 1534 1.220 ad } else { 1535 1.220 ad KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1536 1.44 chs } 1537 1.7 mrg 1538 1.7 mrg /* 1539 1.44 chs * and put on free queue 1540 1.7 mrg */ 1541 1.7 mrg 1542 1.3 chs #ifdef DEBUG 1543 1.7 mrg pg->uobject = (void *)0xdeadbeef; 1544 1.7 mrg pg->uanon = (void *)0xdeadbeef; 1545 1.91 yamt #endif /* DEBUG */ 1546 1.91 yamt 1547 1.221 ad /* Try to send the page to the per-CPU cache. */ 1548 1.213 ad s = splvm(); 1549 1.213 ad ucpu = curcpu()->ci_data.cpu_uvm; 1550 1.221 ad bucket = uvm_page_get_bucket(pg); 1551 1.213 ad if (bucket == ucpu->pgflbucket && uvm_pgflcache_free(ucpu, pg)) { 1552 1.213 ad splx(s); 1553 1.213 ad return; 1554 1.133 ad } 1555 1.34 thorpej 1556 1.213 ad /* Didn't work. Never mind, send it to a global bucket. */ 1557 1.213 ad pgfl = &uvm.page_free[uvm_page_get_freelist(pg)]; 1558 1.213 ad pgb = pgfl->pgfl_buckets[bucket]; 1559 1.213 ad lock = &uvm_freelist_locks[bucket].lock; 1560 1.213 ad 1561 1.213 ad mutex_spin_enter(lock); 1562 1.213 ad /* PG_FREE must be set under lock because of uvm_pglistalloc(). */ 1563 1.242 ad pg->flags = PG_FREE; 1564 1.213 ad LIST_INSERT_HEAD(&pgb->pgb_colors[VM_PGCOLOR(pg)], pg, pageq.list); 1565 1.213 ad pgb->pgb_nfree++; 1566 1.249 chs CPU_COUNT(CPU_COUNT_FREEPAGES, 1); 1567 1.213 ad mutex_spin_exit(lock); 1568 1.213 ad splx(s); 1569 1.44 chs } 1570 1.44 chs 1571 1.44 chs /* 1572 1.44 chs * uvm_page_unbusy: unbusy an array of pages. 1573 1.44 chs * 1574 1.44 chs * => pages must either all belong to the same object, or all belong to anons. 1575 1.44 chs * => if pages are object-owned, object must be locked. 1576 1.67 chs * => if pages are anon-owned, anons must be locked. 1577 1.98 yamt * => caller must make sure that anon-owned pages are not PG_RELEASED. 1578 1.44 chs */ 1579 1.44 chs 1580 1.44 chs void 1581 1.105 thorpej uvm_page_unbusy(struct vm_page **pgs, int npgs) 1582 1.44 chs { 1583 1.44 chs struct vm_page *pg; 1584 1.248 chs int i, pageout_done; 1585 1.244 skrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1586 1.44 chs 1587 1.248 chs pageout_done = 0; 1588 1.44 chs for (i = 0; i < npgs; i++) { 1589 1.44 chs pg = pgs[i]; 1590 1.82 enami if (pg == NULL || pg == PGO_DONTCARE) { 1591 1.44 chs continue; 1592 1.44 chs } 1593 1.98 yamt 1594 1.226 ad KASSERT(uvm_page_owner_locked_p(pg, true)); 1595 1.98 yamt KASSERT(pg->flags & PG_BUSY); 1596 1.248 chs 1597 1.248 chs if (pg->flags & PG_PAGEOUT) { 1598 1.248 chs pg->flags &= ~PG_PAGEOUT; 1599 1.248 chs pg->flags |= PG_RELEASED; 1600 1.248 chs pageout_done++; 1601 1.248 chs atomic_inc_uint(&uvmexp.pdfreed); 1602 1.248 chs } 1603 1.44 chs if (pg->flags & PG_RELEASED) { 1604 1.194 pgoyette UVMHIST_LOG(ubchist, "releasing pg %#jx", 1605 1.194 pgoyette (uintptr_t)pg, 0, 0, 0); 1606 1.98 yamt KASSERT(pg->uobject != NULL || 1607 1.98 yamt (pg->uanon != NULL && pg->uanon->an_ref > 0)); 1608 1.67 chs pg->flags &= ~PG_RELEASED; 1609 1.67 chs uvm_pagefree(pg); 1610 1.44 chs } else { 1611 1.234 ad UVMHIST_LOG(ubchist, "unbusying pg %#jx", 1612 1.234 ad (uintptr_t)pg, 0, 0, 0); 1613 1.142 yamt KASSERT((pg->flags & PG_FAKE) == 0); 1614 1.234 ad pg->flags &= ~PG_BUSY; 1615 1.231 ad uvm_pagelock(pg); 1616 1.234 ad uvm_pagewakeup(pg); 1617 1.231 ad uvm_pageunlock(pg); 1618 1.234 ad UVM_PAGE_OWN(pg, NULL); 1619 1.44 chs } 1620 1.44 chs } 1621 1.248 chs if (pageout_done != 0) { 1622 1.248 chs uvm_pageout_done(pageout_done); 1623 1.248 chs } 1624 1.1 mrg } 1625 1.1 mrg 1626 1.231 ad /* 1627 1.231 ad * uvm_pagewait: wait for a busy page 1628 1.231 ad * 1629 1.231 ad * => page must be known PG_BUSY 1630 1.231 ad * => object must be read or write locked 1631 1.231 ad * => object will be unlocked on return 1632 1.231 ad */ 1633 1.231 ad 1634 1.231 ad void 1635 1.231 ad uvm_pagewait(struct vm_page *pg, krwlock_t *lock, const char *wmesg) 1636 1.231 ad { 1637 1.231 ad 1638 1.231 ad KASSERT(rw_lock_held(lock)); 1639 1.231 ad KASSERT((pg->flags & PG_BUSY) != 0); 1640 1.231 ad KASSERT(uvm_page_owner_locked_p(pg, false)); 1641 1.231 ad 1642 1.231 ad mutex_enter(&pg->interlock); 1643 1.238 ad pg->pqflags |= PQ_WANTED; 1644 1.234 ad rw_exit(lock); 1645 1.231 ad UVM_UNLOCK_AND_WAIT(pg, &pg->interlock, false, wmesg, 0); 1646 1.231 ad } 1647 1.231 ad 1648 1.231 ad /* 1649 1.234 ad * uvm_pagewakeup: wake anyone waiting on a page 1650 1.231 ad * 1651 1.231 ad * => page interlock must be held 1652 1.231 ad */ 1653 1.231 ad 1654 1.231 ad void 1655 1.234 ad uvm_pagewakeup(struct vm_page *pg) 1656 1.231 ad { 1657 1.244 skrll UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1658 1.231 ad 1659 1.231 ad KASSERT(mutex_owned(&pg->interlock)); 1660 1.231 ad 1661 1.234 ad UVMHIST_LOG(ubchist, "waking pg %#jx", (uintptr_t)pg, 0, 0, 0); 1662 1.231 ad 1663 1.231 ad if ((pg->pqflags & PQ_WANTED) != 0) { 1664 1.231 ad wakeup(pg); 1665 1.231 ad pg->pqflags &= ~PQ_WANTED; 1666 1.231 ad } 1667 1.231 ad } 1668 1.231 ad 1669 1.238 ad /* 1670 1.238 ad * uvm_pagewanted_p: return true if someone is waiting on the page 1671 1.238 ad * 1672 1.238 ad * => object must be write locked (lock out all concurrent access) 1673 1.238 ad */ 1674 1.238 ad 1675 1.238 ad bool 1676 1.238 ad uvm_pagewanted_p(struct vm_page *pg) 1677 1.238 ad { 1678 1.238 ad 1679 1.238 ad KASSERT(uvm_page_owner_locked_p(pg, true)); 1680 1.238 ad 1681 1.238 ad return (atomic_load_relaxed(&pg->pqflags) & PQ_WANTED) != 0; 1682 1.238 ad } 1683 1.238 ad 1684 1.1 mrg #if defined(UVM_PAGE_TRKOWN) 1685 1.1 mrg /* 1686 1.1 mrg * uvm_page_own: set or release page ownership 1687 1.1 mrg * 1688 1.1 mrg * => this is a debugging function that keeps track of who sets PG_BUSY 1689 1.1 mrg * and where they do it. it can be used to track down problems 1690 1.1 mrg * such a process setting "PG_BUSY" and never releasing it. 1691 1.1 mrg * => page's object [if any] must be locked 1692 1.1 mrg * => if "tag" is NULL then we are releasing page ownership 1693 1.1 mrg */ 1694 1.7 mrg void 1695 1.105 thorpej uvm_page_own(struct vm_page *pg, const char *tag) 1696 1.1 mrg { 1697 1.112 yamt 1698 1.67 chs KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0); 1699 1.226 ad KASSERT(uvm_page_owner_locked_p(pg, true)); 1700 1.112 yamt 1701 1.7 mrg /* gain ownership? */ 1702 1.7 mrg if (tag) { 1703 1.112 yamt KASSERT((pg->flags & PG_BUSY) != 0); 1704 1.7 mrg if (pg->owner_tag) { 1705 1.7 mrg printf("uvm_page_own: page %p already owned " 1706 1.237 ad "by proc %d.%d [%s]\n", pg, 1707 1.237 ad pg->owner, pg->lowner, pg->owner_tag); 1708 1.7 mrg panic("uvm_page_own"); 1709 1.7 mrg } 1710 1.184 chs pg->owner = curproc->p_pid; 1711 1.184 chs pg->lowner = curlwp->l_lid; 1712 1.7 mrg pg->owner_tag = tag; 1713 1.7 mrg return; 1714 1.7 mrg } 1715 1.7 mrg 1716 1.7 mrg /* drop ownership */ 1717 1.112 yamt KASSERT((pg->flags & PG_BUSY) == 0); 1718 1.7 mrg if (pg->owner_tag == NULL) { 1719 1.7 mrg printf("uvm_page_own: dropping ownership of an non-owned " 1720 1.7 mrg "page (%p)\n", pg); 1721 1.7 mrg panic("uvm_page_own"); 1722 1.7 mrg } 1723 1.7 mrg pg->owner_tag = NULL; 1724 1.1 mrg } 1725 1.1 mrg #endif 1726 1.34 thorpej 1727 1.34 thorpej /* 1728 1.110 yamt * uvm_pagelookup: look up a page 1729 1.110 yamt * 1730 1.110 yamt * => caller should lock object to keep someone from pulling the page 1731 1.110 yamt * out from under it 1732 1.110 yamt */ 1733 1.110 yamt 1734 1.110 yamt struct vm_page * 1735 1.110 yamt uvm_pagelookup(struct uvm_object *obj, voff_t off) 1736 1.110 yamt { 1737 1.110 yamt struct vm_page *pg; 1738 1.110 yamt 1739 1.251 riastrad KASSERT(db_active || rw_lock_held(obj->vmobjlock)); 1740 1.123 ad 1741 1.202 ad pg = radix_tree_lookup_node(&obj->uo_pages, off >> PAGE_SHIFT); 1742 1.134 ad 1743 1.110 yamt KASSERT(pg == NULL || obj->uo_npages != 0); 1744 1.110 yamt KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1745 1.110 yamt (pg->flags & PG_BUSY) != 0); 1746 1.156 rmind return pg; 1747 1.110 yamt } 1748 1.110 yamt 1749 1.110 yamt /* 1750 1.110 yamt * uvm_pagewire: wire the page, thus removing it from the daemon's grasp 1751 1.110 yamt * 1752 1.201 ad * => caller must lock objects 1753 1.220 ad * => caller must hold pg->interlock 1754 1.110 yamt */ 1755 1.110 yamt 1756 1.110 yamt void 1757 1.110 yamt uvm_pagewire(struct vm_page *pg) 1758 1.110 yamt { 1759 1.201 ad 1760 1.226 ad KASSERT(uvm_page_owner_locked_p(pg, true)); 1761 1.220 ad KASSERT(mutex_owned(&pg->interlock)); 1762 1.113 yamt #if defined(READAHEAD_STATS) 1763 1.201 ad if ((pg->flags & PG_READAHEAD) != 0) { 1764 1.113 yamt uvm_ra_hit.ev_count++; 1765 1.201 ad pg->flags &= ~PG_READAHEAD; 1766 1.113 yamt } 1767 1.113 yamt #endif /* defined(READAHEAD_STATS) */ 1768 1.110 yamt if (pg->wire_count == 0) { 1769 1.110 yamt uvm_pagedequeue(pg); 1770 1.201 ad atomic_inc_uint(&uvmexp.wired); 1771 1.110 yamt } 1772 1.110 yamt pg->wire_count++; 1773 1.197 jdolecek KASSERT(pg->wire_count > 0); /* detect wraparound */ 1774 1.110 yamt } 1775 1.110 yamt 1776 1.110 yamt /* 1777 1.110 yamt * uvm_pageunwire: unwire the page. 1778 1.110 yamt * 1779 1.110 yamt * => activate if wire count goes to zero. 1780 1.201 ad * => caller must lock objects 1781 1.220 ad * => caller must hold pg->interlock 1782 1.110 yamt */ 1783 1.110 yamt 1784 1.110 yamt void 1785 1.110 yamt uvm_pageunwire(struct vm_page *pg) 1786 1.110 yamt { 1787 1.201 ad 1788 1.226 ad KASSERT(uvm_page_owner_locked_p(pg, true)); 1789 1.199 kre KASSERT(pg->wire_count != 0); 1790 1.201 ad KASSERT(!uvmpdpol_pageisqueued_p(pg)); 1791 1.220 ad KASSERT(mutex_owned(&pg->interlock)); 1792 1.110 yamt pg->wire_count--; 1793 1.110 yamt if (pg->wire_count == 0) { 1794 1.111 yamt uvm_pageactivate(pg); 1795 1.199 kre KASSERT(uvmexp.wired != 0); 1796 1.201 ad atomic_dec_uint(&uvmexp.wired); 1797 1.110 yamt } 1798 1.110 yamt } 1799 1.110 yamt 1800 1.110 yamt /* 1801 1.110 yamt * uvm_pagedeactivate: deactivate page 1802 1.110 yamt * 1803 1.201 ad * => caller must lock objects 1804 1.110 yamt * => caller must check to make sure page is not wired 1805 1.110 yamt * => object that page belongs to must be locked (so we can adjust pg->flags) 1806 1.110 yamt * => caller must clear the reference on the page before calling 1807 1.220 ad * => caller must hold pg->interlock 1808 1.110 yamt */ 1809 1.110 yamt 1810 1.110 yamt void 1811 1.110 yamt uvm_pagedeactivate(struct vm_page *pg) 1812 1.110 yamt { 1813 1.113 yamt 1814 1.232 ad KASSERT(uvm_page_owner_locked_p(pg, false)); 1815 1.220 ad KASSERT(mutex_owned(&pg->interlock)); 1816 1.201 ad if (pg->wire_count == 0) { 1817 1.201 ad KASSERT(uvmpdpol_pageisqueued_p(pg)); 1818 1.201 ad uvmpdpol_pagedeactivate(pg); 1819 1.201 ad } 1820 1.110 yamt } 1821 1.110 yamt 1822 1.110 yamt /* 1823 1.110 yamt * uvm_pageactivate: activate page 1824 1.110 yamt * 1825 1.201 ad * => caller must lock objects 1826 1.220 ad * => caller must hold pg->interlock 1827 1.110 yamt */ 1828 1.110 yamt 1829 1.110 yamt void 1830 1.110 yamt uvm_pageactivate(struct vm_page *pg) 1831 1.110 yamt { 1832 1.113 yamt 1833 1.232 ad KASSERT(uvm_page_owner_locked_p(pg, false)); 1834 1.220 ad KASSERT(mutex_owned(&pg->interlock)); 1835 1.113 yamt #if defined(READAHEAD_STATS) 1836 1.201 ad if ((pg->flags & PG_READAHEAD) != 0) { 1837 1.113 yamt uvm_ra_hit.ev_count++; 1838 1.201 ad pg->flags &= ~PG_READAHEAD; 1839 1.113 yamt } 1840 1.113 yamt #endif /* defined(READAHEAD_STATS) */ 1841 1.201 ad if (pg->wire_count == 0) { 1842 1.201 ad uvmpdpol_pageactivate(pg); 1843 1.110 yamt } 1844 1.110 yamt } 1845 1.110 yamt 1846 1.110 yamt /* 1847 1.110 yamt * uvm_pagedequeue: remove a page from any paging queue 1848 1.230 skrll * 1849 1.201 ad * => caller must lock objects 1850 1.220 ad * => caller must hold pg->interlock 1851 1.110 yamt */ 1852 1.110 yamt void 1853 1.110 yamt uvm_pagedequeue(struct vm_page *pg) 1854 1.110 yamt { 1855 1.113 yamt 1856 1.226 ad KASSERT(uvm_page_owner_locked_p(pg, true)); 1857 1.220 ad KASSERT(mutex_owned(&pg->interlock)); 1858 1.113 yamt if (uvmpdpol_pageisqueued_p(pg)) { 1859 1.201 ad uvmpdpol_pagedequeue(pg); 1860 1.110 yamt } 1861 1.113 yamt } 1862 1.113 yamt 1863 1.113 yamt /* 1864 1.113 yamt * uvm_pageenqueue: add a page to a paging queue without activating. 1865 1.113 yamt * used where a page is not really demanded (yet). eg. read-ahead 1866 1.201 ad * 1867 1.201 ad * => caller must lock objects 1868 1.220 ad * => caller must hold pg->interlock 1869 1.113 yamt */ 1870 1.113 yamt void 1871 1.113 yamt uvm_pageenqueue(struct vm_page *pg) 1872 1.113 yamt { 1873 1.113 yamt 1874 1.232 ad KASSERT(uvm_page_owner_locked_p(pg, false)); 1875 1.220 ad KASSERT(mutex_owned(&pg->interlock)); 1876 1.201 ad if (pg->wire_count == 0 && !uvmpdpol_pageisqueued_p(pg)) { 1877 1.201 ad uvmpdpol_pageenqueue(pg); 1878 1.113 yamt } 1879 1.110 yamt } 1880 1.110 yamt 1881 1.110 yamt /* 1882 1.220 ad * uvm_pagelock: acquire page interlock 1883 1.220 ad */ 1884 1.220 ad void 1885 1.220 ad uvm_pagelock(struct vm_page *pg) 1886 1.220 ad { 1887 1.220 ad 1888 1.220 ad mutex_enter(&pg->interlock); 1889 1.220 ad } 1890 1.220 ad 1891 1.220 ad /* 1892 1.220 ad * uvm_pagelock2: acquire two page interlocks 1893 1.220 ad */ 1894 1.220 ad void 1895 1.220 ad uvm_pagelock2(struct vm_page *pg1, struct vm_page *pg2) 1896 1.220 ad { 1897 1.220 ad 1898 1.220 ad if (pg1 < pg2) { 1899 1.220 ad mutex_enter(&pg1->interlock); 1900 1.220 ad mutex_enter(&pg2->interlock); 1901 1.220 ad } else { 1902 1.220 ad mutex_enter(&pg2->interlock); 1903 1.220 ad mutex_enter(&pg1->interlock); 1904 1.220 ad } 1905 1.220 ad } 1906 1.220 ad 1907 1.220 ad /* 1908 1.220 ad * uvm_pageunlock: release page interlock, and if a page replacement intent 1909 1.220 ad * is set on the page, pass it to uvmpdpol to make real. 1910 1.230 skrll * 1911 1.220 ad * => caller must hold pg->interlock 1912 1.220 ad */ 1913 1.220 ad void 1914 1.220 ad uvm_pageunlock(struct vm_page *pg) 1915 1.220 ad { 1916 1.220 ad 1917 1.220 ad if ((pg->pqflags & PQ_INTENT_SET) == 0 || 1918 1.220 ad (pg->pqflags & PQ_INTENT_QUEUED) != 0) { 1919 1.220 ad mutex_exit(&pg->interlock); 1920 1.220 ad return; 1921 1.220 ad } 1922 1.220 ad pg->pqflags |= PQ_INTENT_QUEUED; 1923 1.220 ad mutex_exit(&pg->interlock); 1924 1.220 ad uvmpdpol_pagerealize(pg); 1925 1.220 ad } 1926 1.220 ad 1927 1.220 ad /* 1928 1.220 ad * uvm_pageunlock2: release two page interlocks, and for both pages if a 1929 1.220 ad * page replacement intent is set on the page, pass it to uvmpdpol to make 1930 1.220 ad * real. 1931 1.230 skrll * 1932 1.220 ad * => caller must hold pg->interlock 1933 1.220 ad */ 1934 1.220 ad void 1935 1.220 ad uvm_pageunlock2(struct vm_page *pg1, struct vm_page *pg2) 1936 1.220 ad { 1937 1.220 ad 1938 1.220 ad if ((pg1->pqflags & PQ_INTENT_SET) == 0 || 1939 1.220 ad (pg1->pqflags & PQ_INTENT_QUEUED) != 0) { 1940 1.220 ad mutex_exit(&pg1->interlock); 1941 1.220 ad pg1 = NULL; 1942 1.220 ad } else { 1943 1.220 ad pg1->pqflags |= PQ_INTENT_QUEUED; 1944 1.220 ad mutex_exit(&pg1->interlock); 1945 1.220 ad } 1946 1.220 ad 1947 1.220 ad if ((pg2->pqflags & PQ_INTENT_SET) == 0 || 1948 1.220 ad (pg2->pqflags & PQ_INTENT_QUEUED) != 0) { 1949 1.220 ad mutex_exit(&pg2->interlock); 1950 1.220 ad pg2 = NULL; 1951 1.220 ad } else { 1952 1.220 ad pg2->pqflags |= PQ_INTENT_QUEUED; 1953 1.220 ad mutex_exit(&pg2->interlock); 1954 1.220 ad } 1955 1.220 ad 1956 1.220 ad if (pg1 != NULL) { 1957 1.220 ad uvmpdpol_pagerealize(pg1); 1958 1.220 ad } 1959 1.220 ad if (pg2 != NULL) { 1960 1.220 ad uvmpdpol_pagerealize(pg2); 1961 1.220 ad } 1962 1.220 ad } 1963 1.220 ad 1964 1.220 ad /* 1965 1.110 yamt * uvm_pagezero: zero fill a page 1966 1.110 yamt * 1967 1.110 yamt * => if page is part of an object then the object should be locked 1968 1.110 yamt * to protect pg->flags. 1969 1.110 yamt */ 1970 1.110 yamt 1971 1.110 yamt void 1972 1.110 yamt uvm_pagezero(struct vm_page *pg) 1973 1.110 yamt { 1974 1.224 ad 1975 1.224 ad uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1976 1.110 yamt pmap_zero_page(VM_PAGE_TO_PHYS(pg)); 1977 1.110 yamt } 1978 1.110 yamt 1979 1.110 yamt /* 1980 1.110 yamt * uvm_pagecopy: copy a page 1981 1.110 yamt * 1982 1.110 yamt * => if page is part of an object then the object should be locked 1983 1.110 yamt * to protect pg->flags. 1984 1.110 yamt */ 1985 1.110 yamt 1986 1.110 yamt void 1987 1.110 yamt uvm_pagecopy(struct vm_page *src, struct vm_page *dst) 1988 1.110 yamt { 1989 1.110 yamt 1990 1.224 ad uvm_pagemarkdirty(dst, UVM_PAGE_STATUS_DIRTY); 1991 1.110 yamt pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); 1992 1.110 yamt } 1993 1.110 yamt 1994 1.110 yamt /* 1995 1.150 thorpej * uvm_pageismanaged: test it see that a page (specified by PA) is managed. 1996 1.150 thorpej */ 1997 1.150 thorpej 1998 1.150 thorpej bool 1999 1.150 thorpej uvm_pageismanaged(paddr_t pa) 2000 1.150 thorpej { 2001 1.150 thorpej 2002 1.190 cherry return (uvm_physseg_find(atop(pa), NULL) != UVM_PHYSSEG_TYPE_INVALID); 2003 1.150 thorpej } 2004 1.150 thorpej 2005 1.150 thorpej /* 2006 1.110 yamt * uvm_page_lookup_freelist: look up the free list for the specified page 2007 1.110 yamt */ 2008 1.110 yamt 2009 1.110 yamt int 2010 1.110 yamt uvm_page_lookup_freelist(struct vm_page *pg) 2011 1.110 yamt { 2012 1.190 cherry uvm_physseg_t upm; 2013 1.110 yamt 2014 1.190 cherry upm = uvm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL); 2015 1.190 cherry KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID); 2016 1.190 cherry return uvm_physseg_get_free_list(upm); 2017 1.110 yamt } 2018 1.151 thorpej 2019 1.174 rmind /* 2020 1.218 ad * uvm_page_owner_locked_p: return true if object associated with page is 2021 1.174 rmind * locked. this is a weak check for runtime assertions only. 2022 1.174 rmind */ 2023 1.174 rmind 2024 1.174 rmind bool 2025 1.226 ad uvm_page_owner_locked_p(struct vm_page *pg, bool exclusive) 2026 1.174 rmind { 2027 1.174 rmind 2028 1.174 rmind if (pg->uobject != NULL) { 2029 1.226 ad return exclusive 2030 1.226 ad ? rw_write_held(pg->uobject->vmobjlock) 2031 1.226 ad : rw_lock_held(pg->uobject->vmobjlock); 2032 1.174 rmind } 2033 1.174 rmind if (pg->uanon != NULL) { 2034 1.226 ad return exclusive 2035 1.226 ad ? rw_write_held(pg->uanon->an_lock) 2036 1.226 ad : rw_lock_held(pg->uanon->an_lock); 2037 1.174 rmind } 2038 1.174 rmind return true; 2039 1.174 rmind } 2040 1.174 rmind 2041 1.224 ad /* 2042 1.224 ad * uvm_pagereadonly_p: return if the page should be mapped read-only 2043 1.224 ad */ 2044 1.224 ad 2045 1.224 ad bool 2046 1.224 ad uvm_pagereadonly_p(struct vm_page *pg) 2047 1.224 ad { 2048 1.224 ad struct uvm_object * const uobj = pg->uobject; 2049 1.224 ad 2050 1.226 ad KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock)); 2051 1.226 ad KASSERT(uobj != NULL || rw_lock_held(pg->uanon->an_lock)); 2052 1.224 ad if ((pg->flags & PG_RDONLY) != 0) { 2053 1.224 ad return true; 2054 1.224 ad } 2055 1.224 ad if (uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN) { 2056 1.224 ad return true; 2057 1.224 ad } 2058 1.224 ad if (uobj == NULL) { 2059 1.224 ad return false; 2060 1.224 ad } 2061 1.224 ad return UVM_OBJ_NEEDS_WRITEFAULT(uobj); 2062 1.224 ad } 2063 1.224 ad 2064 1.198 jdolecek #ifdef PMAP_DIRECT 2065 1.198 jdolecek /* 2066 1.198 jdolecek * Call pmap to translate physical address into a virtual and to run a callback 2067 1.198 jdolecek * for it. Used to avoid actually mapping the pages, pmap most likely uses direct map 2068 1.198 jdolecek * or equivalent. 2069 1.198 jdolecek */ 2070 1.198 jdolecek int 2071 1.198 jdolecek uvm_direct_process(struct vm_page **pgs, u_int npages, voff_t off, vsize_t len, 2072 1.198 jdolecek int (*process)(void *, size_t, void *), void *arg) 2073 1.198 jdolecek { 2074 1.198 jdolecek int error = 0; 2075 1.198 jdolecek paddr_t pa; 2076 1.198 jdolecek size_t todo; 2077 1.198 jdolecek voff_t pgoff = (off & PAGE_MASK); 2078 1.198 jdolecek struct vm_page *pg; 2079 1.198 jdolecek 2080 1.252 riastrad KASSERT(npages > 0); 2081 1.252 riastrad KASSERT(len > 0); 2082 1.198 jdolecek 2083 1.198 jdolecek for (int i = 0; i < npages; i++) { 2084 1.198 jdolecek pg = pgs[i]; 2085 1.198 jdolecek 2086 1.198 jdolecek KASSERT(len > 0); 2087 1.198 jdolecek 2088 1.198 jdolecek /* 2089 1.198 jdolecek * Caller is responsible for ensuring all the pages are 2090 1.198 jdolecek * available. 2091 1.198 jdolecek */ 2092 1.252 riastrad KASSERT(pg != NULL); 2093 1.252 riastrad KASSERT(pg != PGO_DONTCARE); 2094 1.198 jdolecek 2095 1.198 jdolecek pa = VM_PAGE_TO_PHYS(pg); 2096 1.198 jdolecek todo = MIN(len, PAGE_SIZE - pgoff); 2097 1.198 jdolecek 2098 1.198 jdolecek error = pmap_direct_process(pa, pgoff, todo, process, arg); 2099 1.198 jdolecek if (error) 2100 1.198 jdolecek break; 2101 1.198 jdolecek 2102 1.198 jdolecek pgoff = 0; 2103 1.198 jdolecek len -= todo; 2104 1.198 jdolecek } 2105 1.198 jdolecek 2106 1.198 jdolecek KASSERTMSG(error != 0 || len == 0, "len %lu != 0 for non-error", len); 2107 1.198 jdolecek return error; 2108 1.198 jdolecek } 2109 1.198 jdolecek #endif /* PMAP_DIRECT */ 2110 1.198 jdolecek 2111 1.151 thorpej #if defined(DDB) || defined(DEBUGPRINT) 2112 1.151 thorpej 2113 1.151 thorpej /* 2114 1.151 thorpej * uvm_page_printit: actually print the page 2115 1.151 thorpej */ 2116 1.151 thorpej 2117 1.151 thorpej static const char page_flagbits[] = UVM_PGFLAGBITS; 2118 1.225 ad static const char page_pqflagbits[] = UVM_PQFLAGBITS; 2119 1.151 thorpej 2120 1.151 thorpej void 2121 1.151 thorpej uvm_page_printit(struct vm_page *pg, bool full, 2122 1.151 thorpej void (*pr)(const char *, ...)) 2123 1.151 thorpej { 2124 1.151 thorpej struct vm_page *tpg; 2125 1.151 thorpej struct uvm_object *uobj; 2126 1.213 ad struct pgflbucket *pgb; 2127 1.151 thorpej struct pgflist *pgl; 2128 1.151 thorpej char pgbuf[128]; 2129 1.151 thorpej 2130 1.151 thorpej (*pr)("PAGE %p:\n", pg); 2131 1.151 thorpej snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); 2132 1.225 ad (*pr)(" flags=%s\n", pgbuf); 2133 1.225 ad snprintb(pgbuf, sizeof(pgbuf), page_pqflagbits, pg->pqflags); 2134 1.225 ad (*pr)(" pqflags=%s\n", pgbuf); 2135 1.225 ad (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n", 2136 1.225 ad pg->uobject, pg->uanon, (long long)pg->offset); 2137 1.225 ad (*pr)(" loan_count=%d wire_count=%d bucket=%d freelist=%d\n", 2138 1.225 ad pg->loan_count, pg->wire_count, uvm_page_get_bucket(pg), 2139 1.225 ad uvm_page_get_freelist(pg)); 2140 1.225 ad (*pr)(" pa=0x%lx\n", (long)VM_PAGE_TO_PHYS(pg)); 2141 1.151 thorpej #if defined(UVM_PAGE_TRKOWN) 2142 1.151 thorpej if (pg->flags & PG_BUSY) 2143 1.237 ad (*pr)(" owning process = %d.%d, tag=%s\n", 2144 1.237 ad pg->owner, pg->lowner, pg->owner_tag); 2145 1.151 thorpej else 2146 1.151 thorpej (*pr)(" page not busy, no owner\n"); 2147 1.151 thorpej #else 2148 1.151 thorpej (*pr)(" [page ownership tracking disabled]\n"); 2149 1.151 thorpej #endif 2150 1.151 thorpej 2151 1.151 thorpej if (!full) 2152 1.151 thorpej return; 2153 1.151 thorpej 2154 1.151 thorpej /* cross-verify object/anon */ 2155 1.201 ad if ((pg->flags & PG_FREE) == 0) { 2156 1.201 ad if (pg->flags & PG_ANON) { 2157 1.151 thorpej if (pg->uanon == NULL || pg->uanon->an_page != pg) 2158 1.151 thorpej (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", 2159 1.151 thorpej (pg->uanon) ? pg->uanon->an_page : NULL); 2160 1.151 thorpej else 2161 1.151 thorpej (*pr)(" anon backpointer is OK\n"); 2162 1.151 thorpej } else { 2163 1.151 thorpej uobj = pg->uobject; 2164 1.151 thorpej if (uobj) { 2165 1.151 thorpej (*pr)(" checking object list\n"); 2166 1.203 ad tpg = uvm_pagelookup(uobj, pg->offset); 2167 1.151 thorpej if (tpg) 2168 1.151 thorpej (*pr)(" page found on object list\n"); 2169 1.151 thorpej else 2170 1.151 thorpej (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n"); 2171 1.151 thorpej } 2172 1.151 thorpej } 2173 1.151 thorpej } 2174 1.151 thorpej 2175 1.151 thorpej /* cross-verify page queue */ 2176 1.201 ad if (pg->flags & PG_FREE) { 2177 1.209 ad int fl = uvm_page_get_freelist(pg); 2178 1.213 ad int b = uvm_page_get_bucket(pg); 2179 1.213 ad pgb = uvm.page_free[fl].pgfl_buckets[b]; 2180 1.213 ad pgl = &pgb->pgb_colors[VM_PGCOLOR(pg)]; 2181 1.151 thorpej (*pr)(" checking pageq list\n"); 2182 1.151 thorpej LIST_FOREACH(tpg, pgl, pageq.list) { 2183 1.151 thorpej if (tpg == pg) { 2184 1.151 thorpej break; 2185 1.151 thorpej } 2186 1.151 thorpej } 2187 1.151 thorpej if (tpg) 2188 1.151 thorpej (*pr)(" page found on pageq list\n"); 2189 1.151 thorpej else 2190 1.151 thorpej (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n"); 2191 1.151 thorpej } 2192 1.151 thorpej } 2193 1.151 thorpej 2194 1.151 thorpej /* 2195 1.201 ad * uvm_page_printall - print a summary of all managed pages 2196 1.151 thorpej */ 2197 1.151 thorpej 2198 1.151 thorpej void 2199 1.151 thorpej uvm_page_printall(void (*pr)(const char *, ...)) 2200 1.151 thorpej { 2201 1.190 cherry uvm_physseg_t i; 2202 1.190 cherry paddr_t pfn; 2203 1.151 thorpej struct vm_page *pg; 2204 1.151 thorpej 2205 1.151 thorpej (*pr)("%18s %4s %4s %18s %18s" 2206 1.151 thorpej #ifdef UVM_PAGE_TRKOWN 2207 1.151 thorpej " OWNER" 2208 1.151 thorpej #endif 2209 1.151 thorpej "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON"); 2210 1.190 cherry for (i = uvm_physseg_get_first(); 2211 1.190 cherry uvm_physseg_valid_p(i); 2212 1.190 cherry i = uvm_physseg_get_next(i)) { 2213 1.190 cherry for (pfn = uvm_physseg_get_start(i); 2214 1.192 maya pfn < uvm_physseg_get_end(i); 2215 1.190 cherry pfn++) { 2216 1.190 cherry pg = PHYS_TO_VM_PAGE(ptoa(pfn)); 2217 1.190 cherry 2218 1.201 ad (*pr)("%18p %04x %08x %18p %18p", 2219 1.151 thorpej pg, pg->flags, pg->pqflags, pg->uobject, 2220 1.151 thorpej pg->uanon); 2221 1.151 thorpej #ifdef UVM_PAGE_TRKOWN 2222 1.151 thorpej if (pg->flags & PG_BUSY) 2223 1.151 thorpej (*pr)(" %d [%s]", pg->owner, pg->owner_tag); 2224 1.151 thorpej #endif 2225 1.151 thorpej (*pr)("\n"); 2226 1.151 thorpej } 2227 1.151 thorpej } 2228 1.151 thorpej } 2229 1.151 thorpej 2230 1.213 ad /* 2231 1.213 ad * uvm_page_print_freelists - print a summary freelists 2232 1.213 ad */ 2233 1.213 ad 2234 1.213 ad void 2235 1.213 ad uvm_page_print_freelists(void (*pr)(const char *, ...)) 2236 1.213 ad { 2237 1.213 ad struct pgfreelist *pgfl; 2238 1.213 ad struct pgflbucket *pgb; 2239 1.213 ad int fl, b, c; 2240 1.213 ad 2241 1.213 ad (*pr)("There are %d freelists with %d buckets of %d colors.\n\n", 2242 1.213 ad VM_NFREELIST, uvm.bucketcount, uvmexp.ncolors); 2243 1.230 skrll 2244 1.213 ad for (fl = 0; fl < VM_NFREELIST; fl++) { 2245 1.213 ad pgfl = &uvm.page_free[fl]; 2246 1.213 ad (*pr)("freelist(%d) @ %p\n", fl, pgfl); 2247 1.213 ad for (b = 0; b < uvm.bucketcount; b++) { 2248 1.213 ad pgb = uvm.page_free[fl].pgfl_buckets[b]; 2249 1.213 ad (*pr)(" bucket(%d) @ %p, nfree = %d, lock @ %p:\n", 2250 1.213 ad b, pgb, pgb->pgb_nfree, 2251 1.213 ad &uvm_freelist_locks[b].lock); 2252 1.213 ad for (c = 0; c < uvmexp.ncolors; c++) { 2253 1.213 ad (*pr)(" color(%d) @ %p, ", c, 2254 1.213 ad &pgb->pgb_colors[c]); 2255 1.213 ad (*pr)("first page = %p\n", 2256 1.213 ad LIST_FIRST(&pgb->pgb_colors[c])); 2257 1.213 ad } 2258 1.213 ad } 2259 1.213 ad } 2260 1.213 ad } 2261 1.213 ad 2262 1.151 thorpej #endif /* DDB || DEBUGPRINT */ 2263