uvm_page.c revision 1.213 1 /* $NetBSD: uvm_page.c,v 1.213 2019/12/27 12:51:57 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2019 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1997 Charles D. Cranor and Washington University.
34 * Copyright (c) 1991, 1993, The Regents of the University of California.
35 *
36 * All rights reserved.
37 *
38 * This code is derived from software contributed to Berkeley by
39 * The Mach Operating System project at Carnegie-Mellon University.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
66 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
67 *
68 *
69 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
70 * All rights reserved.
71 *
72 * Permission to use, copy, modify and distribute this software and
73 * its documentation is hereby granted, provided that both the copyright
74 * notice and this permission notice appear in all copies of the
75 * software, derivative works or modified versions, and any portions
76 * thereof, and that both notices appear in supporting documentation.
77 *
78 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
79 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
80 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
81 *
82 * Carnegie Mellon requests users of this software to return to
83 *
84 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
85 * School of Computer Science
86 * Carnegie Mellon University
87 * Pittsburgh PA 15213-3890
88 *
89 * any improvements or extensions that they make and grant Carnegie the
90 * rights to redistribute these changes.
91 */
92
93 /*
94 * uvm_page.c: page ops.
95 */
96
97 #include <sys/cdefs.h>
98 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.213 2019/12/27 12:51:57 ad Exp $");
99
100 #include "opt_ddb.h"
101 #include "opt_uvm.h"
102 #include "opt_uvmhist.h"
103 #include "opt_readahead.h"
104
105 #include <sys/param.h>
106 #include <sys/systm.h>
107 #include <sys/sched.h>
108 #include <sys/kernel.h>
109 #include <sys/vnode.h>
110 #include <sys/proc.h>
111 #include <sys/radixtree.h>
112 #include <sys/atomic.h>
113 #include <sys/cpu.h>
114 #include <sys/extent.h>
115
116 #include <uvm/uvm.h>
117 #include <uvm/uvm_ddb.h>
118 #include <uvm/uvm_pdpolicy.h>
119 #include <uvm/uvm_pgflcache.h>
120
121 /*
122 * Some supported CPUs in a given architecture don't support all
123 * of the things necessary to do idle page zero'ing efficiently.
124 * We therefore provide a way to enable it from machdep code here.
125 */
126 bool vm_page_zero_enable = false;
127
128 /*
129 * number of pages per-CPU to reserve for the kernel.
130 */
131 #ifndef UVM_RESERVED_PAGES_PER_CPU
132 #define UVM_RESERVED_PAGES_PER_CPU 5
133 #endif
134 int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU;
135
136 /*
137 * physical memory size;
138 */
139 psize_t physmem;
140
141 /*
142 * local variables
143 */
144
145 /*
146 * these variables record the values returned by vm_page_bootstrap,
147 * for debugging purposes. The implementation of uvm_pageboot_alloc
148 * and pmap_startup here also uses them internally.
149 */
150
151 static vaddr_t virtual_space_start;
152 static vaddr_t virtual_space_end;
153
154 /*
155 * we allocate an initial number of page colors in uvm_page_init(),
156 * and remember them. We may re-color pages as cache sizes are
157 * discovered during the autoconfiguration phase. But we can never
158 * free the initial set of buckets, since they are allocated using
159 * uvm_pageboot_alloc().
160 */
161
162 static size_t recolored_pages_memsize /* = 0 */;
163 static char *recolored_pages_mem;
164
165 /*
166 * freelist locks - one per bucket.
167 */
168
169 union uvm_freelist_lock uvm_freelist_locks[PGFL_MAX_BUCKETS]
170 __cacheline_aligned;
171
172 /*
173 * basic NUMA information.
174 */
175
176 static struct uvm_page_numa_region {
177 struct uvm_page_numa_region *next;
178 paddr_t start;
179 paddr_t size;
180 u_int numa_id;
181 } *uvm_page_numa_region;
182
183 #ifdef DEBUG
184 vaddr_t uvm_zerocheckkva;
185 #endif /* DEBUG */
186
187 /*
188 * These functions are reserved for uvm(9) internal use and are not
189 * exported in the header file uvm_physseg.h
190 *
191 * Thus they are redefined here.
192 */
193 void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *);
194 void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t);
195
196 /* returns a pgs array */
197 struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t);
198
199 /*
200 * inline functions
201 */
202
203 /*
204 * uvm_pageinsert: insert a page in the object.
205 *
206 * => caller must lock object
207 * => call should have already set pg's object and offset pointers
208 * and bumped the version counter
209 */
210
211 static inline void
212 uvm_pageinsert_object(struct uvm_object *uobj, struct vm_page *pg)
213 {
214
215 KASSERT(uobj == pg->uobject);
216 KASSERT(mutex_owned(uobj->vmobjlock));
217 KASSERT((pg->flags & PG_TABLED) == 0);
218
219 if (UVM_OBJ_IS_VNODE(uobj)) {
220 if (uobj->uo_npages == 0) {
221 struct vnode *vp = (struct vnode *)uobj;
222
223 vholdl(vp);
224 }
225 if (UVM_OBJ_IS_VTEXT(uobj)) {
226 cpu_count(CPU_COUNT_EXECPAGES, 1);
227 } else {
228 cpu_count(CPU_COUNT_FILEPAGES, 1);
229 }
230 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
231 cpu_count(CPU_COUNT_ANONPAGES, 1);
232 }
233 pg->flags |= PG_TABLED;
234 uobj->uo_npages++;
235 }
236
237 static inline int
238 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg)
239 {
240 const uint64_t idx = pg->offset >> PAGE_SHIFT;
241 int error;
242
243 error = radix_tree_insert_node(&uobj->uo_pages, idx, pg);
244 if (error != 0) {
245 return error;
246 }
247 return 0;
248 }
249
250 /*
251 * uvm_page_remove: remove page from object.
252 *
253 * => caller must lock object
254 */
255
256 static inline void
257 uvm_pageremove_object(struct uvm_object *uobj, struct vm_page *pg)
258 {
259
260 KASSERT(uobj == pg->uobject);
261 KASSERT(mutex_owned(uobj->vmobjlock));
262 KASSERT(pg->flags & PG_TABLED);
263
264 if (UVM_OBJ_IS_VNODE(uobj)) {
265 if (uobj->uo_npages == 1) {
266 struct vnode *vp = (struct vnode *)uobj;
267
268 holdrelel(vp);
269 }
270 if (UVM_OBJ_IS_VTEXT(uobj)) {
271 cpu_count(CPU_COUNT_EXECPAGES, -1);
272 } else {
273 cpu_count(CPU_COUNT_FILEPAGES, -1);
274 }
275 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
276 cpu_count(CPU_COUNT_ANONPAGES, -1);
277 }
278
279 /* object should be locked */
280 uobj->uo_npages--;
281 pg->flags &= ~PG_TABLED;
282 pg->uobject = NULL;
283 }
284
285 static inline void
286 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg)
287 {
288 struct vm_page *opg __unused;
289
290 opg = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT);
291 KASSERT(pg == opg);
292 }
293
294 static void
295 uvm_page_init_bucket(struct pgfreelist *pgfl, struct pgflbucket *pgb, int num)
296 {
297 int i;
298
299 pgb->pgb_nfree = 0;
300 for (i = 0; i < uvmexp.ncolors; i++) {
301 LIST_INIT(&pgb->pgb_colors[i]);
302 }
303 pgfl->pgfl_buckets[num] = pgb;
304 }
305
306 /*
307 * uvm_page_init: init the page system. called from uvm_init().
308 *
309 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
310 */
311
312 void
313 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
314 {
315 static struct uvm_cpu boot_cpu __cacheline_aligned;
316 psize_t freepages, pagecount, bucketsize, n;
317 struct pgflbucket *pgb;
318 struct vm_page *pagearray;
319 char *bucketarray;
320 uvm_physseg_t bank;
321 int fl, b;
322
323 KASSERT(ncpu <= 1);
324
325 /*
326 * init the page queues and free page queue locks, except the
327 * free list; we allocate that later (with the initial vm_page
328 * structures).
329 */
330
331 uvm.cpus[0] = &boot_cpu;
332 curcpu()->ci_data.cpu_uvm = &boot_cpu;
333 uvmpdpol_init();
334 for (b = 0; b < __arraycount(uvm_freelist_locks); b++) {
335 mutex_init(&uvm_freelist_locks[b].lock, MUTEX_DEFAULT, IPL_VM);
336 }
337
338 /*
339 * allocate vm_page structures.
340 */
341
342 /*
343 * sanity check:
344 * before calling this function the MD code is expected to register
345 * some free RAM with the uvm_page_physload() function. our job
346 * now is to allocate vm_page structures for this memory.
347 */
348
349 if (uvm_physseg_get_last() == UVM_PHYSSEG_TYPE_INVALID)
350 panic("uvm_page_bootstrap: no memory pre-allocated");
351
352 /*
353 * first calculate the number of free pages...
354 *
355 * note that we use start/end rather than avail_start/avail_end.
356 * this allows us to allocate extra vm_page structures in case we
357 * want to return some memory to the pool after booting.
358 */
359
360 freepages = 0;
361
362 for (bank = uvm_physseg_get_first();
363 uvm_physseg_valid_p(bank) ;
364 bank = uvm_physseg_get_next(bank)) {
365 freepages += (uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank));
366 }
367
368 /*
369 * Let MD code initialize the number of colors, or default
370 * to 1 color if MD code doesn't care.
371 */
372 if (uvmexp.ncolors == 0)
373 uvmexp.ncolors = 1;
374 uvmexp.colormask = uvmexp.ncolors - 1;
375 KASSERT((uvmexp.colormask & uvmexp.ncolors) == 0);
376
377 /* We always start with only 1 bucket. */
378 uvm.bucketcount = 1;
379
380 /*
381 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
382 * use. for each page of memory we use we need a vm_page structure.
383 * thus, the total number of pages we can use is the total size of
384 * the memory divided by the PAGE_SIZE plus the size of the vm_page
385 * structure. we add one to freepages as a fudge factor to avoid
386 * truncation errors (since we can only allocate in terms of whole
387 * pages).
388 */
389 pagecount = ((freepages + 1) << PAGE_SHIFT) /
390 (PAGE_SIZE + sizeof(struct vm_page));
391 bucketsize = offsetof(struct pgflbucket, pgb_colors[uvmexp.ncolors]);
392 bucketsize = roundup2(bucketsize, coherency_unit);
393 bucketarray = (void *)uvm_pageboot_alloc(
394 bucketsize * VM_NFREELIST +
395 pagecount * sizeof(struct vm_page));
396 pagearray = (struct vm_page *)
397 (bucketarray + bucketsize * VM_NFREELIST);
398
399 for (fl = 0; fl < VM_NFREELIST; fl++) {
400 pgb = (struct pgflbucket *)(bucketarray + bucketsize * fl);
401 uvm_page_init_bucket(&uvm.page_free[fl], pgb, 0);
402 }
403 memset(pagearray, 0, pagecount * sizeof(struct vm_page));
404
405 /*
406 * init the freelist cache in the disabled state.
407 */
408 uvm_pgflcache_init();
409
410 /*
411 * init the vm_page structures and put them in the correct place.
412 */
413 /* First init the extent */
414
415 for (bank = uvm_physseg_get_first(),
416 uvm_physseg_seg_chomp_slab(bank, pagearray, pagecount);
417 uvm_physseg_valid_p(bank);
418 bank = uvm_physseg_get_next(bank)) {
419
420 n = uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank);
421 uvm_physseg_seg_alloc_from_slab(bank, n);
422 uvm_physseg_init_seg(bank, pagearray);
423
424 /* set up page array pointers */
425 pagearray += n;
426 pagecount -= n;
427 }
428
429 /*
430 * pass up the values of virtual_space_start and
431 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
432 * layers of the VM.
433 */
434
435 *kvm_startp = round_page(virtual_space_start);
436 *kvm_endp = trunc_page(virtual_space_end);
437 #ifdef DEBUG
438 /*
439 * steal kva for uvm_pagezerocheck().
440 */
441 uvm_zerocheckkva = *kvm_startp;
442 *kvm_startp += PAGE_SIZE;
443 #endif /* DEBUG */
444
445 /*
446 * init various thresholds.
447 */
448
449 uvmexp.reserve_pagedaemon = 1;
450 uvmexp.reserve_kernel = vm_page_reserve_kernel;
451
452 /*
453 * done!
454 */
455
456 uvm.page_init_done = true;
457 }
458
459 /*
460 * uvm_pgfl_lock: lock all freelist buckets
461 */
462
463 void
464 uvm_pgfl_lock(void)
465 {
466 int i;
467
468 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) {
469 mutex_spin_enter(&uvm_freelist_locks[i].lock);
470 }
471 }
472
473 /*
474 * uvm_pgfl_unlock: unlock all freelist buckets
475 */
476
477 void
478 uvm_pgfl_unlock(void)
479 {
480 int i;
481
482 for (i = 0; i < __arraycount(uvm_freelist_locks); i++) {
483 mutex_spin_exit(&uvm_freelist_locks[i].lock);
484 }
485 }
486
487 /*
488 * uvm_setpagesize: set the page size
489 *
490 * => sets page_shift and page_mask from uvmexp.pagesize.
491 */
492
493 void
494 uvm_setpagesize(void)
495 {
496
497 /*
498 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
499 * to be a constant (indicated by being a non-zero value).
500 */
501 if (uvmexp.pagesize == 0) {
502 if (PAGE_SIZE == 0)
503 panic("uvm_setpagesize: uvmexp.pagesize not set");
504 uvmexp.pagesize = PAGE_SIZE;
505 }
506 uvmexp.pagemask = uvmexp.pagesize - 1;
507 if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
508 panic("uvm_setpagesize: page size %u (%#x) not a power of two",
509 uvmexp.pagesize, uvmexp.pagesize);
510 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
511 if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
512 break;
513 }
514
515 /*
516 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
517 */
518
519 vaddr_t
520 uvm_pageboot_alloc(vsize_t size)
521 {
522 static bool initialized = false;
523 vaddr_t addr;
524 #if !defined(PMAP_STEAL_MEMORY)
525 vaddr_t vaddr;
526 paddr_t paddr;
527 #endif
528
529 /*
530 * on first call to this function, initialize ourselves.
531 */
532 if (initialized == false) {
533 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
534
535 /* round it the way we like it */
536 virtual_space_start = round_page(virtual_space_start);
537 virtual_space_end = trunc_page(virtual_space_end);
538
539 initialized = true;
540 }
541
542 /* round to page size */
543 size = round_page(size);
544 uvmexp.bootpages += atop(size);
545
546 #if defined(PMAP_STEAL_MEMORY)
547
548 /*
549 * defer bootstrap allocation to MD code (it may want to allocate
550 * from a direct-mapped segment). pmap_steal_memory should adjust
551 * virtual_space_start/virtual_space_end if necessary.
552 */
553
554 addr = pmap_steal_memory(size, &virtual_space_start,
555 &virtual_space_end);
556
557 return(addr);
558
559 #else /* !PMAP_STEAL_MEMORY */
560
561 /*
562 * allocate virtual memory for this request
563 */
564 if (virtual_space_start == virtual_space_end ||
565 (virtual_space_end - virtual_space_start) < size)
566 panic("uvm_pageboot_alloc: out of virtual space");
567
568 addr = virtual_space_start;
569
570 #ifdef PMAP_GROWKERNEL
571 /*
572 * If the kernel pmap can't map the requested space,
573 * then allocate more resources for it.
574 */
575 if (uvm_maxkaddr < (addr + size)) {
576 uvm_maxkaddr = pmap_growkernel(addr + size);
577 if (uvm_maxkaddr < (addr + size))
578 panic("uvm_pageboot_alloc: pmap_growkernel() failed");
579 }
580 #endif
581
582 virtual_space_start += size;
583
584 /*
585 * allocate and mapin physical pages to back new virtual pages
586 */
587
588 for (vaddr = round_page(addr) ; vaddr < addr + size ;
589 vaddr += PAGE_SIZE) {
590
591 if (!uvm_page_physget(&paddr))
592 panic("uvm_pageboot_alloc: out of memory");
593
594 /*
595 * Note this memory is no longer managed, so using
596 * pmap_kenter is safe.
597 */
598 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
599 }
600 pmap_update(pmap_kernel());
601 return(addr);
602 #endif /* PMAP_STEAL_MEMORY */
603 }
604
605 #if !defined(PMAP_STEAL_MEMORY)
606 /*
607 * uvm_page_physget: "steal" one page from the vm_physmem structure.
608 *
609 * => attempt to allocate it off the end of a segment in which the "avail"
610 * values match the start/end values. if we can't do that, then we
611 * will advance both values (making them equal, and removing some
612 * vm_page structures from the non-avail area).
613 * => return false if out of memory.
614 */
615
616 /* subroutine: try to allocate from memory chunks on the specified freelist */
617 static bool uvm_page_physget_freelist(paddr_t *, int);
618
619 static bool
620 uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
621 {
622 uvm_physseg_t lcv;
623
624 /* pass 1: try allocating from a matching end */
625 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
626 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv))
627 #else
628 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv))
629 #endif
630 {
631 if (uvm.page_init_done == true)
632 panic("uvm_page_physget: called _after_ bootstrap");
633
634 /* Try to match at front or back on unused segment */
635 if (uvm_page_physunload(lcv, freelist, paddrp))
636 return true;
637 }
638
639 /* pass2: forget about matching ends, just allocate something */
640 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
641 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv))
642 #else
643 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv))
644 #endif
645 {
646 /* Try the front regardless. */
647 if (uvm_page_physunload_force(lcv, freelist, paddrp))
648 return true;
649 }
650 return false;
651 }
652
653 bool
654 uvm_page_physget(paddr_t *paddrp)
655 {
656 int i;
657
658 /* try in the order of freelist preference */
659 for (i = 0; i < VM_NFREELIST; i++)
660 if (uvm_page_physget_freelist(paddrp, i) == true)
661 return (true);
662 return (false);
663 }
664 #endif /* PMAP_STEAL_MEMORY */
665
666 /*
667 * PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages
668 * back from an I/O mapping (ugh!). used in some MD code as well.
669 */
670 struct vm_page *
671 uvm_phys_to_vm_page(paddr_t pa)
672 {
673 paddr_t pf = atop(pa);
674 paddr_t off;
675 uvm_physseg_t upm;
676
677 upm = uvm_physseg_find(pf, &off);
678 if (upm != UVM_PHYSSEG_TYPE_INVALID)
679 return uvm_physseg_get_pg(upm, off);
680 return(NULL);
681 }
682
683 paddr_t
684 uvm_vm_page_to_phys(const struct vm_page *pg)
685 {
686
687 return pg->phys_addr & ~(PAGE_SIZE - 1);
688 }
689
690 /*
691 * uvm_page_numa_load: load NUMA range description.
692 */
693 void
694 uvm_page_numa_load(paddr_t start, paddr_t size, u_int numa_id)
695 {
696 struct uvm_page_numa_region *d;
697
698 KASSERT(numa_id < PGFL_MAX_BUCKETS);
699
700 d = kmem_alloc(sizeof(*d), KM_SLEEP);
701 d->start = start;
702 d->size = size;
703 d->numa_id = numa_id;
704 d->next = uvm_page_numa_region;
705 uvm_page_numa_region = d;
706 }
707
708 /*
709 * uvm_page_numa_lookup: lookup NUMA node for the given page.
710 */
711 static u_int
712 uvm_page_numa_lookup(struct vm_page *pg)
713 {
714 struct uvm_page_numa_region *d;
715 static bool warned;
716 paddr_t pa;
717
718 KASSERT(uvm.numa_alloc);
719 KASSERT(uvm_page_numa_region != NULL);
720
721 pa = VM_PAGE_TO_PHYS(pg);
722 for (d = uvm_page_numa_region; d != NULL; d = d->next) {
723 if (pa >= d->start && pa < d->start + d->size) {
724 return d->numa_id;
725 }
726 }
727
728 if (!warned) {
729 printf("uvm_page_numa_lookup: failed, first pg=%p pa=%p\n",
730 pg, (void *)VM_PAGE_TO_PHYS(pg));
731 warned = true;
732 }
733
734 return 0;
735 }
736
737 /*
738 * uvm_page_redim: adjust freelist dimensions if they have changed.
739 */
740
741 static void
742 uvm_page_redim(int newncolors, int newnbuckets)
743 {
744 struct pgfreelist npgfl;
745 struct pgflbucket *opgb, *npgb;
746 struct pgflist *ohead, *nhead;
747 struct vm_page *pg;
748 size_t bucketsize, bucketmemsize, oldbucketmemsize;
749 int fl, ob, oc, nb, nc, obuckets, ocolors;
750 char *bucketarray, *oldbucketmem, *bucketmem;
751
752 KASSERT(((newncolors - 1) & newncolors) == 0);
753
754 /* Anything to do? */
755 if (newncolors <= uvmexp.ncolors &&
756 newnbuckets == uvm.bucketcount) {
757 return;
758 }
759 if (uvm.page_init_done == false) {
760 uvmexp.ncolors = newncolors;
761 return;
762 }
763
764 bucketsize = offsetof(struct pgflbucket, pgb_colors[newncolors]);
765 bucketsize = roundup2(bucketsize, coherency_unit);
766 bucketmemsize = bucketsize * newnbuckets * VM_NFREELIST +
767 coherency_unit - 1;
768 bucketmem = kmem_zalloc(bucketmemsize, KM_SLEEP);
769 bucketarray = (char *)roundup2((uintptr_t)bucketmem, coherency_unit);
770
771 ocolors = uvmexp.ncolors;
772 obuckets = uvm.bucketcount;
773
774 /* Freelist cache musn't be enabled. */
775 uvm_pgflcache_pause();
776
777 /* Make sure we should still do this. */
778 uvm_pgfl_lock();
779 if (newncolors <= uvmexp.ncolors &&
780 newnbuckets == uvm.bucketcount) {
781 uvm_pgfl_unlock();
782 kmem_free(bucketmem, bucketmemsize);
783 return;
784 }
785
786 uvmexp.ncolors = newncolors;
787 uvmexp.colormask = uvmexp.ncolors - 1;
788 uvm.bucketcount = newnbuckets;
789
790 for (fl = 0; fl < VM_NFREELIST; fl++) {
791 /* Init new buckets in new freelist. */
792 memset(&npgfl, 0, sizeof(npgfl));
793 for (nb = 0; nb < newnbuckets; nb++) {
794 npgb = (struct pgflbucket *)bucketarray;
795 uvm_page_init_bucket(&npgfl, npgb, nb);
796 bucketarray += bucketsize;
797 }
798 /* Now transfer pages from the old freelist. */
799 for (nb = ob = 0; ob < obuckets; ob++) {
800 opgb = uvm.page_free[fl].pgfl_buckets[ob];
801 for (oc = 0; oc < ocolors; oc++) {
802 ohead = &opgb->pgb_colors[oc];
803 while ((pg = LIST_FIRST(ohead)) != NULL) {
804 LIST_REMOVE(pg, pageq.list);
805 /*
806 * Here we decide on the NEW color &
807 * bucket for the page. For NUMA
808 * we'll use the info that the
809 * hardware gave us. Otherwise we
810 * just do a round-robin among the
811 * buckets.
812 */
813 KASSERT(
814 uvm_page_get_bucket(pg) == ob);
815 KASSERT(fl ==
816 uvm_page_get_freelist(pg));
817 if (uvm.numa_alloc) {
818 nb = uvm_page_numa_lookup(pg);
819 } else if (nb + 1 < newnbuckets) {
820 nb = nb + 1;
821 } else {
822 nb = 0;
823 }
824 uvm_page_set_bucket(pg, nb);
825 npgb = npgfl.pgfl_buckets[nb];
826 npgb->pgb_nfree++;
827 nc = VM_PGCOLOR(pg);
828 nhead = &npgb->pgb_colors[nc];
829 LIST_INSERT_HEAD(nhead, pg, pageq.list);
830 }
831 }
832 }
833 /* Install the new freelist. */
834 memcpy(&uvm.page_free[fl], &npgfl, sizeof(npgfl));
835 }
836
837 /* Unlock and free the old memory. */
838 oldbucketmemsize = recolored_pages_memsize;
839 oldbucketmem = recolored_pages_mem;
840 recolored_pages_memsize = bucketmemsize;
841 recolored_pages_mem = bucketmem;
842 uvm_pgfl_unlock();
843
844 if (oldbucketmemsize) {
845 kmem_free(oldbucketmem, oldbucketmemsize);
846 }
847
848 uvm_pgflcache_resume();
849
850 /*
851 * this calls uvm_km_alloc() which may want to hold
852 * uvm_freelist_lock.
853 */
854 uvm_pager_realloc_emerg();
855 }
856
857 /*
858 * uvm_page_recolor: Recolor the pages if the new color count is
859 * larger than the old one.
860 */
861
862 void
863 uvm_page_recolor(int newncolors)
864 {
865
866 uvm_page_redim(newncolors, uvm.bucketcount);
867 }
868
869 /*
870 * uvm_page_rebucket: Determine a bucket structure and redim the free
871 * lists to match.
872 */
873
874 void
875 uvm_page_rebucket(void)
876 {
877 u_int min_numa, max_numa, npackage, shift;
878 struct cpu_info *ci, *ci2, *ci3;
879 CPU_INFO_ITERATOR cii;
880
881 /*
882 * If we have more than one NUMA node, and the maximum NUMA node ID
883 * is less than PGFL_MAX_BUCKETS, then we'll use NUMA distribution
884 * for free pages. uvm_pagefree() will not reassign pages to a
885 * different bucket on free.
886 */
887 min_numa = (u_int)-1;
888 max_numa = 0;
889 for (CPU_INFO_FOREACH(cii, ci)) {
890 if (ci->ci_numa_id < min_numa) {
891 min_numa = ci->ci_numa_id;
892 }
893 if (ci->ci_numa_id > max_numa) {
894 max_numa = ci->ci_numa_id;
895 }
896 }
897 if (min_numa != max_numa && max_numa < PGFL_MAX_BUCKETS) {
898 #ifdef NUMA
899 /*
900 * We can do this, and it seems to work well, but until
901 * further experiments are done we'll stick with the cache
902 * locality strategy.
903 */
904 aprint_debug("UVM: using NUMA allocation scheme\n");
905 for (CPU_INFO_FOREACH(cii, ci)) {
906 ci->ci_data.cpu_uvm->pgflbucket = ci->ci_numa_id;
907 }
908 uvm.numa_alloc = true;
909 uvm_page_redim(uvmexp.ncolors, max_numa + 1);
910 return;
911 #endif
912 }
913
914 /*
915 * Otherwise we'll go with a scheme to maximise L2/L3 cache locality
916 * and minimise lock contention. Count the total number of CPU
917 * packages, and then try to distribute the buckets among CPU
918 * packages evenly. uvm_pagefree() will reassign pages to the
919 * freeing CPU's preferred bucket on free.
920 */
921 npackage = 0;
922 ci = curcpu();
923 ci2 = ci;
924 do {
925 npackage++;
926 ci2 = ci2->ci_sibling[CPUREL_PEER];
927 } while (ci2 != ci);
928
929 /*
930 * Figure out how to arrange the packages & buckets, and the total
931 * number of buckets we need. XXX 2 may not be the best factor.
932 */
933 for (shift = 0; npackage > PGFL_MAX_BUCKETS; shift++) {
934 npackage >>= 1;
935 }
936 uvm_page_redim(uvmexp.ncolors, npackage);
937
938 /*
939 * Now tell each CPU which bucket to use. In the outer loop, scroll
940 * through all CPU packages.
941 */
942 npackage = 0;
943 ci = curcpu();
944 ci2 = ci;
945 do {
946 /*
947 * In the inner loop, scroll through all CPUs in the package
948 * and assign the same bucket ID.
949 */
950 ci3 = ci2;
951 do {
952 ci3->ci_data.cpu_uvm->pgflbucket = npackage >> shift;
953 ci3 = ci3->ci_sibling[CPUREL_PACKAGE];
954 } while (ci3 != ci2);
955 npackage++;
956 ci2 = ci2->ci_sibling[CPUREL_PEER];
957 } while (ci2 != ci);
958
959 aprint_debug("UVM: using package allocation scheme, "
960 "%d package(s) per bucket\n", 1 << shift);
961 }
962
963 /*
964 * uvm_cpu_attach: initialize per-CPU data structures.
965 */
966
967 void
968 uvm_cpu_attach(struct cpu_info *ci)
969 {
970 struct uvm_cpu *ucpu;
971
972 /* Already done in uvm_page_init(). */
973 if (!CPU_IS_PRIMARY(ci)) {
974 /* Add more reserve pages for this CPU. */
975 uvmexp.reserve_kernel += vm_page_reserve_kernel;
976
977 /* Allocate per-CPU data structures. */
978 ucpu = kmem_zalloc(sizeof(struct uvm_cpu) + coherency_unit - 1,
979 KM_SLEEP);
980 ucpu = (struct uvm_cpu *)roundup2((uintptr_t)ucpu,
981 coherency_unit);
982 uvm.cpus[cpu_index(ci)] = ucpu;
983 ci->ci_data.cpu_uvm = ucpu;
984 }
985
986 /*
987 * Attach RNG source for this CPU's VM events
988 */
989 rnd_attach_source(&uvm.cpus[cpu_index(ci)]->rs,
990 ci->ci_data.cpu_name, RND_TYPE_VM,
991 RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE|
992 RND_FLAG_ESTIMATE_VALUE);
993 }
994
995 /*
996 * uvm_free: fetch the total amount of free memory in pages. This can have a
997 * detrimental effect on performance due to false sharing; don't call unless
998 * needed.
999 */
1000
1001 int
1002 uvm_free(void)
1003 {
1004 struct pgfreelist *pgfl;
1005 int fl, b, fpages;
1006
1007 fpages = 0;
1008 for (fl = 0; fl < VM_NFREELIST; fl++) {
1009 pgfl = &uvm.page_free[fl];
1010 for (b = 0; b < uvm.bucketcount; b++) {
1011 fpages += pgfl->pgfl_buckets[b]->pgb_nfree;
1012 }
1013 }
1014 return fpages;
1015 }
1016
1017 /*
1018 * uvm_pagealloc_pgb: helper routine that tries to allocate any color from a
1019 * specific freelist and specific bucket only.
1020 *
1021 * => must be at IPL_VM or higher to protect per-CPU data structures.
1022 */
1023
1024 static struct vm_page *
1025 uvm_pagealloc_pgb(struct uvm_cpu *ucpu, int f, int b, int *trycolorp, int flags)
1026 {
1027 int c, trycolor, colormask;
1028 struct pgflbucket *pgb;
1029 struct vm_page *pg;
1030 kmutex_t *lock;
1031
1032 /*
1033 * Skip the bucket if empty, no lock needed. There could be many
1034 * empty freelists/buckets.
1035 */
1036 pgb = uvm.page_free[f].pgfl_buckets[b];
1037 if (pgb->pgb_nfree == 0) {
1038 return NULL;
1039 }
1040
1041 /* Skip bucket if low on memory. */
1042 lock = &uvm_freelist_locks[b].lock;
1043 mutex_spin_enter(lock);
1044 if (__predict_false(pgb->pgb_nfree <= uvmexp.reserve_kernel)) {
1045 if ((flags & UVM_PGA_USERESERVE) == 0 ||
1046 (pgb->pgb_nfree <= uvmexp.reserve_pagedaemon &&
1047 curlwp != uvm.pagedaemon_lwp)) {
1048 mutex_spin_exit(lock);
1049 return NULL;
1050 }
1051 }
1052
1053 /* Try all page colors as needed. */
1054 c = trycolor = *trycolorp;
1055 colormask = uvmexp.colormask;
1056 do {
1057 pg = LIST_FIRST(&pgb->pgb_colors[c]);
1058 if (__predict_true(pg != NULL)) {
1059 /*
1060 * Got a free page! PG_FREE must be cleared under
1061 * lock because of uvm_pglistalloc().
1062 */
1063 LIST_REMOVE(pg, pageq.list);
1064 KASSERT(pg->flags & PG_FREE);
1065 pg->flags &= PG_ZERO;
1066 pgb->pgb_nfree--;
1067
1068 /*
1069 * While we have the bucket locked and our data
1070 * structures fresh in L1 cache, we have an ideal
1071 * opportunity to grab some pages for the freelist
1072 * cache without causing extra contention. Only do
1073 * so if we found pages in this CPU's preferred
1074 * bucket.
1075 */
1076 if (__predict_true(b == ucpu->pgflbucket)) {
1077 uvm_pgflcache_fill(ucpu, f, b, c);
1078 }
1079 mutex_spin_exit(lock);
1080 KASSERT(uvm_page_get_bucket(pg) == b);
1081 CPU_COUNT(c == trycolor ?
1082 CPU_COUNT_COLORHIT : CPU_COUNT_COLORMISS, 1);
1083 CPU_COUNT(CPU_COUNT_CPUMISS, 1);
1084 *trycolorp = c;
1085 return pg;
1086 }
1087 c = (c + 1) & colormask;
1088 } while (c != trycolor);
1089 mutex_spin_exit(lock);
1090
1091 return NULL;
1092 }
1093
1094 /*
1095 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat that allocates
1096 * any color from any bucket, in a specific freelist.
1097 *
1098 * => must be at IPL_VM or higher to protect per-CPU data structures.
1099 */
1100
1101 static struct vm_page *
1102 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int f, int *trycolorp, int flags)
1103 {
1104 int b, trybucket, bucketcount;
1105 struct vm_page *pg;
1106
1107 /* Try for the exact thing in the per-CPU cache. */
1108 if ((pg = uvm_pgflcache_alloc(ucpu, f, *trycolorp)) != NULL) {
1109 CPU_COUNT(CPU_COUNT_CPUHIT, 1);
1110 CPU_COUNT(CPU_COUNT_COLORHIT, 1);
1111 return pg;
1112 }
1113
1114 /* Walk through all buckets, trying our preferred bucket first. */
1115 trybucket = ucpu->pgflbucket;
1116 b = trybucket;
1117 bucketcount = uvm.bucketcount;
1118 do {
1119 pg = uvm_pagealloc_pgb(ucpu, f, b, trycolorp, flags);
1120 if (pg != NULL) {
1121 return pg;
1122 }
1123 b = (b + 1 == bucketcount ? 0 : b + 1);
1124 } while (b != trybucket);
1125
1126 return NULL;
1127 }
1128
1129 /*
1130 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
1131 *
1132 * => return null if no pages free
1133 * => wake up pagedaemon if number of free pages drops below low water mark
1134 * => if obj != NULL, obj must be locked (to put in obj's tree)
1135 * => if anon != NULL, anon must be locked (to put in anon)
1136 * => only one of obj or anon can be non-null
1137 * => caller must activate/deactivate page if it is not wired.
1138 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
1139 * => policy decision: it is more important to pull a page off of the
1140 * appropriate priority free list than it is to get a zero'd or
1141 * unknown contents page. This is because we live with the
1142 * consequences of a bad free list decision for the entire
1143 * lifetime of the page, e.g. if the page comes from memory that
1144 * is slower to access.
1145 */
1146
1147 struct vm_page *
1148 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
1149 int flags, int strat, int free_list)
1150 {
1151 int zeroit = 0, color;
1152 int lcv, error, s;
1153 struct uvm_cpu *ucpu;
1154 struct vm_page *pg;
1155 lwp_t *l;
1156
1157 KASSERT(obj == NULL || anon == NULL);
1158 KASSERT(anon == NULL || (flags & UVM_FLAG_COLORMATCH) || off == 0);
1159 KASSERT(off == trunc_page(off));
1160 KASSERT(obj == NULL || mutex_owned(obj->vmobjlock));
1161 KASSERT(anon == NULL || anon->an_lock == NULL ||
1162 mutex_owned(anon->an_lock));
1163
1164 /*
1165 * This implements a global round-robin page coloring
1166 * algorithm.
1167 */
1168
1169 s = splvm();
1170 ucpu = curcpu()->ci_data.cpu_uvm;
1171 if (flags & UVM_FLAG_COLORMATCH) {
1172 color = atop(off) & uvmexp.colormask;
1173 } else {
1174 color = ucpu->pgflcolor;
1175 }
1176
1177 /*
1178 * fail if any of these conditions is true:
1179 * [1] there really are no free pages, or
1180 * [2] only kernel "reserved" pages remain and
1181 * reserved pages have not been requested.
1182 * [3] only pagedaemon "reserved" pages remain and
1183 * the requestor isn't the pagedaemon.
1184 * we make kernel reserve pages available if called by a
1185 * kernel thread or a realtime thread.
1186 */
1187 l = curlwp;
1188 if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) {
1189 flags |= UVM_PGA_USERESERVE;
1190 }
1191
1192 /* If the allocator's running in NUMA mode, go with NUMA strategy. */
1193 if (uvm.numa_alloc && strat == UVM_PGA_STRAT_NORMAL) {
1194 strat = UVM_PGA_STRAT_NUMA;
1195 }
1196
1197 again:
1198 switch (strat) {
1199 case UVM_PGA_STRAT_NORMAL:
1200 /* Check freelists: descending priority (ascending id) order. */
1201 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
1202 pg = uvm_pagealloc_pgfl(ucpu, lcv, &color, flags);
1203 if (pg != NULL) {
1204 goto gotit;
1205 }
1206 }
1207
1208 /* No pages free! Have pagedaemon free some memory. */
1209 splx(s);
1210 uvm_kick_pdaemon();
1211 return NULL;
1212
1213 case UVM_PGA_STRAT_ONLY:
1214 case UVM_PGA_STRAT_FALLBACK:
1215 /* Attempt to allocate from the specified free list. */
1216 KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
1217 pg = uvm_pagealloc_pgfl(ucpu, free_list, &color, flags);
1218 if (pg != NULL) {
1219 goto gotit;
1220 }
1221
1222 /* Fall back, if possible. */
1223 if (strat == UVM_PGA_STRAT_FALLBACK) {
1224 strat = UVM_PGA_STRAT_NORMAL;
1225 goto again;
1226 }
1227
1228 /* No pages free! Have pagedaemon free some memory. */
1229 splx(s);
1230 uvm_kick_pdaemon();
1231 return NULL;
1232
1233 case UVM_PGA_STRAT_NUMA:
1234 /*
1235 * NUMA strategy: allocating from the correct bucket is more
1236 * important than observing freelist priority. Look only to
1237 * the current NUMA node; if that fails, we need to look to
1238 * other NUMA nodes, so retry with the normal strategy.
1239 */
1240 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
1241 pg = uvm_pgflcache_alloc(ucpu, lcv, color);
1242 if (pg != NULL) {
1243 CPU_COUNT(CPU_COUNT_CPUHIT, 1);
1244 CPU_COUNT(CPU_COUNT_COLORHIT, 1);
1245 goto gotit;
1246 }
1247 pg = uvm_pagealloc_pgb(ucpu, lcv,
1248 ucpu->pgflbucket, &color, flags);
1249 if (pg != NULL) {
1250 goto gotit;
1251 }
1252 }
1253 strat = UVM_PGA_STRAT_NORMAL;
1254 goto again;
1255
1256 default:
1257 panic("uvm_pagealloc_strat: bad strat %d", strat);
1258 /* NOTREACHED */
1259 }
1260
1261 gotit:
1262 /*
1263 * We now know which color we actually allocated from; set
1264 * the next color accordingly.
1265 */
1266
1267 ucpu->pgflcolor = (color + 1) & uvmexp.colormask;
1268
1269 /*
1270 * while still at IPL_VM, update allocation statistics and remember
1271 * if we have to zero the page
1272 */
1273
1274 if (flags & UVM_PGA_ZERO) {
1275 if (pg->flags & PG_ZERO) {
1276 CPU_COUNT(CPU_COUNT_PGA_ZEROHIT, 1);
1277 zeroit = 0;
1278 } else {
1279 CPU_COUNT(CPU_COUNT_PGA_ZEROMISS, 1);
1280 zeroit = 1;
1281 }
1282 }
1283 if (pg->flags & PG_ZERO) {
1284 CPU_COUNT(CPU_COUNT_ZEROPAGES, -1);
1285 }
1286 if (anon) {
1287 CPU_COUNT(CPU_COUNT_ANONPAGES, 1);
1288 }
1289 splx(s);
1290 KASSERT((pg->flags & ~(PG_ZERO|PG_FREE)) == 0);
1291
1292 /*
1293 * assign the page to the object. as the page was free, we know
1294 * that pg->uobject and pg->uanon are NULL. we only need to take
1295 * the page's interlock if we are changing the values.
1296 */
1297 if (anon != NULL || obj != NULL) {
1298 mutex_enter(&pg->interlock);
1299 }
1300 pg->offset = off;
1301 pg->uobject = obj;
1302 pg->uanon = anon;
1303 KASSERT(uvm_page_locked_p(pg));
1304 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
1305 if (anon) {
1306 anon->an_page = pg;
1307 pg->flags |= PG_ANON;
1308 mutex_exit(&pg->interlock);
1309 } else if (obj) {
1310 uvm_pageinsert_object(obj, pg);
1311 mutex_exit(&pg->interlock);
1312 error = uvm_pageinsert_tree(obj, pg);
1313 if (error != 0) {
1314 mutex_enter(&pg->interlock);
1315 uvm_pageremove_object(obj, pg);
1316 mutex_exit(&pg->interlock);
1317 uvm_pagefree(pg);
1318 return NULL;
1319 }
1320 }
1321
1322 #if defined(UVM_PAGE_TRKOWN)
1323 pg->owner_tag = NULL;
1324 #endif
1325 UVM_PAGE_OWN(pg, "new alloc");
1326
1327 if (flags & UVM_PGA_ZERO) {
1328 /*
1329 * A zero'd page is not clean. If we got a page not already
1330 * zero'd, then we have to zero it ourselves.
1331 */
1332 pg->flags &= ~PG_CLEAN;
1333 if (zeroit)
1334 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1335 }
1336
1337 return(pg);
1338 }
1339
1340 /*
1341 * uvm_pagereplace: replace a page with another
1342 *
1343 * => object must be locked
1344 */
1345
1346 void
1347 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg)
1348 {
1349 struct uvm_object *uobj = oldpg->uobject;
1350
1351 KASSERT((oldpg->flags & PG_TABLED) != 0);
1352 KASSERT(uobj != NULL);
1353 KASSERT((newpg->flags & PG_TABLED) == 0);
1354 KASSERT(newpg->uobject == NULL);
1355 KASSERT(mutex_owned(uobj->vmobjlock));
1356
1357 newpg->offset = oldpg->offset;
1358 uvm_pageremove_tree(uobj, oldpg);
1359 uvm_pageinsert_tree(uobj, newpg);
1360
1361 /* take page interlocks during rename */
1362 if (oldpg < newpg) {
1363 mutex_enter(&oldpg->interlock);
1364 mutex_enter(&newpg->interlock);
1365 } else {
1366 mutex_enter(&newpg->interlock);
1367 mutex_enter(&oldpg->interlock);
1368 }
1369 newpg->uobject = uobj;
1370 uvm_pageinsert_object(uobj, newpg);
1371 uvm_pageremove_object(uobj, oldpg);
1372 mutex_exit(&oldpg->interlock);
1373 mutex_exit(&newpg->interlock);
1374 }
1375
1376 /*
1377 * uvm_pagerealloc: reallocate a page from one object to another
1378 *
1379 * => both objects must be locked
1380 * => both interlocks must be held
1381 */
1382
1383 void
1384 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
1385 {
1386 /*
1387 * remove it from the old object
1388 */
1389
1390 if (pg->uobject) {
1391 uvm_pageremove_tree(pg->uobject, pg);
1392 uvm_pageremove_object(pg->uobject, pg);
1393 }
1394
1395 /*
1396 * put it in the new object
1397 */
1398
1399 if (newobj) {
1400 /*
1401 * XXX we have no in-tree users of this functionality
1402 */
1403 panic("uvm_pagerealloc: no impl");
1404 }
1405 }
1406
1407 #ifdef DEBUG
1408 /*
1409 * check if page is zero-filled
1410 */
1411 void
1412 uvm_pagezerocheck(struct vm_page *pg)
1413 {
1414 int *p, *ep;
1415
1416 KASSERT(uvm_zerocheckkva != 0);
1417
1418 /*
1419 * XXX assuming pmap_kenter_pa and pmap_kremove never call
1420 * uvm page allocator.
1421 *
1422 * it might be better to have "CPU-local temporary map" pmap interface.
1423 */
1424 pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ, 0);
1425 p = (int *)uvm_zerocheckkva;
1426 ep = (int *)((char *)p + PAGE_SIZE);
1427 pmap_update(pmap_kernel());
1428 while (p < ep) {
1429 if (*p != 0)
1430 panic("PG_ZERO page isn't zero-filled");
1431 p++;
1432 }
1433 pmap_kremove(uvm_zerocheckkva, PAGE_SIZE);
1434 /*
1435 * pmap_update() is not necessary here because no one except us
1436 * uses this VA.
1437 */
1438 }
1439 #endif /* DEBUG */
1440
1441 /*
1442 * uvm_pagefree: free page
1443 *
1444 * => erase page's identity (i.e. remove from object)
1445 * => put page on free list
1446 * => caller must lock owning object (either anon or uvm_object)
1447 * => assumes all valid mappings of pg are gone
1448 */
1449
1450 void
1451 uvm_pagefree(struct vm_page *pg)
1452 {
1453 struct pgfreelist *pgfl;
1454 struct pgflbucket *pgb;
1455 struct uvm_cpu *ucpu;
1456 kmutex_t *lock;
1457 int bucket, s;
1458 bool locked;
1459
1460 #ifdef DEBUG
1461 if (pg->uobject == (void *)0xdeadbeef &&
1462 pg->uanon == (void *)0xdeadbeef) {
1463 panic("uvm_pagefree: freeing free page %p", pg);
1464 }
1465 #endif /* DEBUG */
1466
1467 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1468 KASSERT(!(pg->flags & PG_FREE));
1469 KASSERT(pg->uobject == NULL || mutex_owned(pg->uobject->vmobjlock));
1470 KASSERT(pg->uobject != NULL || pg->uanon == NULL ||
1471 mutex_owned(pg->uanon->an_lock));
1472
1473 /*
1474 * remove the page from the object's tree beore acquiring any page
1475 * interlocks: this can acquire locks to free radixtree nodes.
1476 */
1477 if (pg->uobject != NULL) {
1478 uvm_pageremove_tree(pg->uobject, pg);
1479 }
1480
1481 /*
1482 * if the page is loaned, resolve the loan instead of freeing.
1483 */
1484
1485 if (pg->loan_count) {
1486 KASSERT(pg->wire_count == 0);
1487
1488 /*
1489 * if the page is owned by an anon then we just want to
1490 * drop anon ownership. the kernel will free the page when
1491 * it is done with it. if the page is owned by an object,
1492 * remove it from the object and mark it dirty for the benefit
1493 * of possible anon owners.
1494 *
1495 * regardless of previous ownership, wakeup any waiters,
1496 * unbusy the page, and we're done.
1497 */
1498
1499 mutex_enter(&pg->interlock);
1500 locked = true;
1501 if (pg->uobject != NULL) {
1502 uvm_pageremove_object(pg->uobject, pg);
1503 pg->flags &= ~PG_CLEAN;
1504 } else if (pg->uanon != NULL) {
1505 if ((pg->flags & PG_ANON) == 0) {
1506 pg->loan_count--;
1507 } else {
1508 pg->flags &= ~PG_ANON;
1509 cpu_count(CPU_COUNT_ANONPAGES, -1);
1510 }
1511 pg->uanon->an_page = NULL;
1512 pg->uanon = NULL;
1513 }
1514 if (pg->flags & PG_WANTED) {
1515 wakeup(pg);
1516 }
1517 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
1518 #ifdef UVM_PAGE_TRKOWN
1519 pg->owner_tag = NULL;
1520 #endif
1521 if (pg->loan_count) {
1522 KASSERT(pg->uobject == NULL);
1523 mutex_exit(&pg->interlock);
1524 if (pg->uanon == NULL) {
1525 uvm_pagedequeue(pg);
1526 }
1527 return;
1528 }
1529 } else if (pg->uobject != NULL || pg->uanon != NULL ||
1530 pg->wire_count != 0) {
1531 mutex_enter(&pg->interlock);
1532 locked = true;
1533 } else {
1534 locked = false;
1535 }
1536
1537 /*
1538 * remove page from its object or anon.
1539 */
1540 if (pg->uobject != NULL) {
1541 uvm_pageremove_object(pg->uobject, pg);
1542 } else if (pg->uanon != NULL) {
1543 pg->uanon->an_page = NULL;
1544 pg->uanon = NULL;
1545 cpu_count(CPU_COUNT_ANONPAGES, -1);
1546 }
1547
1548 /*
1549 * if the page was wired, unwire it now.
1550 */
1551
1552 if (pg->wire_count) {
1553 pg->wire_count = 0;
1554 atomic_dec_uint(&uvmexp.wired);
1555 }
1556 if (locked) {
1557 mutex_exit(&pg->interlock);
1558 }
1559
1560 /*
1561 * now remove the page from the queues.
1562 */
1563 uvm_pagedequeue(pg);
1564
1565 /*
1566 * and put on free queue
1567 */
1568
1569 #ifdef DEBUG
1570 pg->uobject = (void *)0xdeadbeef;
1571 pg->uanon = (void *)0xdeadbeef;
1572 if (pg->flags & PG_ZERO)
1573 uvm_pagezerocheck(pg);
1574 #endif /* DEBUG */
1575
1576 s = splvm();
1577 ucpu = curcpu()->ci_data.cpu_uvm;
1578
1579 /*
1580 * If we're using the NUMA strategy, we'll only cache this page if
1581 * it came from the local CPU's NUMA node. Otherwise we're using
1582 * the L2/L3 cache locality strategy and we'll cache anything.
1583 */
1584 if (uvm.numa_alloc) {
1585 bucket = uvm_page_get_bucket(pg);
1586 } else {
1587 bucket = ucpu->pgflbucket;
1588 uvm_page_set_bucket(pg, bucket);
1589 }
1590
1591 /* Try to send the page to the per-CPU cache. */
1592 if (bucket == ucpu->pgflbucket && uvm_pgflcache_free(ucpu, pg)) {
1593 splx(s);
1594 return;
1595 }
1596
1597 /* Didn't work. Never mind, send it to a global bucket. */
1598 pgfl = &uvm.page_free[uvm_page_get_freelist(pg)];
1599 pgb = pgfl->pgfl_buckets[bucket];
1600 lock = &uvm_freelist_locks[bucket].lock;
1601
1602 mutex_spin_enter(lock);
1603 /* PG_FREE must be set under lock because of uvm_pglistalloc(). */
1604 pg->flags = (pg->flags & PG_ZERO) | PG_FREE;
1605 LIST_INSERT_HEAD(&pgb->pgb_colors[VM_PGCOLOR(pg)], pg, pageq.list);
1606 pgb->pgb_nfree++;
1607 mutex_spin_exit(lock);
1608 splx(s);
1609 }
1610
1611 /*
1612 * uvm_page_unbusy: unbusy an array of pages.
1613 *
1614 * => pages must either all belong to the same object, or all belong to anons.
1615 * => if pages are object-owned, object must be locked.
1616 * => if pages are anon-owned, anons must be locked.
1617 * => caller must make sure that anon-owned pages are not PG_RELEASED.
1618 */
1619
1620 void
1621 uvm_page_unbusy(struct vm_page **pgs, int npgs)
1622 {
1623 struct vm_page *pg;
1624 int i;
1625 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
1626
1627 for (i = 0; i < npgs; i++) {
1628 pg = pgs[i];
1629 if (pg == NULL || pg == PGO_DONTCARE) {
1630 continue;
1631 }
1632
1633 KASSERT(uvm_page_locked_p(pg));
1634 KASSERT(pg->flags & PG_BUSY);
1635 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1636 if (pg->flags & PG_WANTED) {
1637 /* XXXAD thundering herd problem. */
1638 wakeup(pg);
1639 }
1640 if (pg->flags & PG_RELEASED) {
1641 UVMHIST_LOG(ubchist, "releasing pg %#jx",
1642 (uintptr_t)pg, 0, 0, 0);
1643 KASSERT(pg->uobject != NULL ||
1644 (pg->uanon != NULL && pg->uanon->an_ref > 0));
1645 pg->flags &= ~PG_RELEASED;
1646 uvm_pagefree(pg);
1647 } else {
1648 UVMHIST_LOG(ubchist, "unbusying pg %#jx",
1649 (uintptr_t)pg, 0, 0, 0);
1650 KASSERT((pg->flags & PG_FAKE) == 0);
1651 pg->flags &= ~(PG_WANTED|PG_BUSY);
1652 UVM_PAGE_OWN(pg, NULL);
1653 }
1654 }
1655 }
1656
1657 #if defined(UVM_PAGE_TRKOWN)
1658 /*
1659 * uvm_page_own: set or release page ownership
1660 *
1661 * => this is a debugging function that keeps track of who sets PG_BUSY
1662 * and where they do it. it can be used to track down problems
1663 * such a process setting "PG_BUSY" and never releasing it.
1664 * => page's object [if any] must be locked
1665 * => if "tag" is NULL then we are releasing page ownership
1666 */
1667 void
1668 uvm_page_own(struct vm_page *pg, const char *tag)
1669 {
1670
1671 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
1672 KASSERT((pg->flags & PG_WANTED) == 0);
1673 KASSERT(uvm_page_locked_p(pg));
1674
1675 /* gain ownership? */
1676 if (tag) {
1677 KASSERT((pg->flags & PG_BUSY) != 0);
1678 if (pg->owner_tag) {
1679 printf("uvm_page_own: page %p already owned "
1680 "by proc %d [%s]\n", pg,
1681 pg->owner, pg->owner_tag);
1682 panic("uvm_page_own");
1683 }
1684 pg->owner = curproc->p_pid;
1685 pg->lowner = curlwp->l_lid;
1686 pg->owner_tag = tag;
1687 return;
1688 }
1689
1690 /* drop ownership */
1691 KASSERT((pg->flags & PG_BUSY) == 0);
1692 if (pg->owner_tag == NULL) {
1693 printf("uvm_page_own: dropping ownership of an non-owned "
1694 "page (%p)\n", pg);
1695 panic("uvm_page_own");
1696 }
1697 pg->owner_tag = NULL;
1698 }
1699 #endif
1700
1701 /*
1702 * uvm_pageidlezero: zero free pages while the system is idle.
1703 */
1704 void
1705 uvm_pageidlezero(void)
1706 {
1707
1708 /*
1709 * Disabled for the moment. Previous strategy too cache heavy. In
1710 * the future we may experiment with zeroing the pages held in the
1711 * per-CPU cache (uvm_pgflcache).
1712 */
1713 }
1714
1715 /*
1716 * uvm_pagelookup: look up a page
1717 *
1718 * => caller should lock object to keep someone from pulling the page
1719 * out from under it
1720 */
1721
1722 struct vm_page *
1723 uvm_pagelookup(struct uvm_object *obj, voff_t off)
1724 {
1725 struct vm_page *pg;
1726
1727 /* No - used from DDB. KASSERT(mutex_owned(obj->vmobjlock)); */
1728
1729 pg = radix_tree_lookup_node(&obj->uo_pages, off >> PAGE_SHIFT);
1730
1731 KASSERT(pg == NULL || obj->uo_npages != 0);
1732 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1733 (pg->flags & PG_BUSY) != 0);
1734 return pg;
1735 }
1736
1737 /*
1738 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1739 *
1740 * => caller must lock objects
1741 */
1742
1743 void
1744 uvm_pagewire(struct vm_page *pg)
1745 {
1746
1747 KASSERT(uvm_page_locked_p(pg));
1748 #if defined(READAHEAD_STATS)
1749 if ((pg->flags & PG_READAHEAD) != 0) {
1750 uvm_ra_hit.ev_count++;
1751 pg->flags &= ~PG_READAHEAD;
1752 }
1753 #endif /* defined(READAHEAD_STATS) */
1754 if (pg->wire_count == 0) {
1755 uvm_pagedequeue(pg);
1756 atomic_inc_uint(&uvmexp.wired);
1757 }
1758 mutex_enter(&pg->interlock);
1759 pg->wire_count++;
1760 mutex_exit(&pg->interlock);
1761 KASSERT(pg->wire_count > 0); /* detect wraparound */
1762 }
1763
1764 /*
1765 * uvm_pageunwire: unwire the page.
1766 *
1767 * => activate if wire count goes to zero.
1768 * => caller must lock objects
1769 */
1770
1771 void
1772 uvm_pageunwire(struct vm_page *pg)
1773 {
1774
1775 KASSERT(uvm_page_locked_p(pg));
1776 KASSERT(pg->wire_count != 0);
1777 KASSERT(!uvmpdpol_pageisqueued_p(pg));
1778 mutex_enter(&pg->interlock);
1779 pg->wire_count--;
1780 mutex_exit(&pg->interlock);
1781 if (pg->wire_count == 0) {
1782 uvm_pageactivate(pg);
1783 KASSERT(uvmexp.wired != 0);
1784 atomic_dec_uint(&uvmexp.wired);
1785 }
1786 }
1787
1788 /*
1789 * uvm_pagedeactivate: deactivate page
1790 *
1791 * => caller must lock objects
1792 * => caller must check to make sure page is not wired
1793 * => object that page belongs to must be locked (so we can adjust pg->flags)
1794 * => caller must clear the reference on the page before calling
1795 */
1796
1797 void
1798 uvm_pagedeactivate(struct vm_page *pg)
1799 {
1800
1801 KASSERT(uvm_page_locked_p(pg));
1802 if (pg->wire_count == 0) {
1803 KASSERT(uvmpdpol_pageisqueued_p(pg));
1804 uvmpdpol_pagedeactivate(pg);
1805 }
1806 }
1807
1808 /*
1809 * uvm_pageactivate: activate page
1810 *
1811 * => caller must lock objects
1812 */
1813
1814 void
1815 uvm_pageactivate(struct vm_page *pg)
1816 {
1817
1818 KASSERT(uvm_page_locked_p(pg));
1819 #if defined(READAHEAD_STATS)
1820 if ((pg->flags & PG_READAHEAD) != 0) {
1821 uvm_ra_hit.ev_count++;
1822 pg->flags &= ~PG_READAHEAD;
1823 }
1824 #endif /* defined(READAHEAD_STATS) */
1825 if (pg->wire_count == 0) {
1826 uvmpdpol_pageactivate(pg);
1827 }
1828 }
1829
1830 /*
1831 * uvm_pagedequeue: remove a page from any paging queue
1832 *
1833 * => caller must lock objects
1834 */
1835 void
1836 uvm_pagedequeue(struct vm_page *pg)
1837 {
1838
1839 KASSERT(uvm_page_locked_p(pg));
1840 if (uvmpdpol_pageisqueued_p(pg)) {
1841 uvmpdpol_pagedequeue(pg);
1842 }
1843 }
1844
1845 /*
1846 * uvm_pageenqueue: add a page to a paging queue without activating.
1847 * used where a page is not really demanded (yet). eg. read-ahead
1848 *
1849 * => caller must lock objects
1850 */
1851 void
1852 uvm_pageenqueue(struct vm_page *pg)
1853 {
1854
1855 KASSERT(uvm_page_locked_p(pg));
1856 if (pg->wire_count == 0 && !uvmpdpol_pageisqueued_p(pg)) {
1857 uvmpdpol_pageenqueue(pg);
1858 }
1859 }
1860
1861 /*
1862 * uvm_pagezero: zero fill a page
1863 *
1864 * => if page is part of an object then the object should be locked
1865 * to protect pg->flags.
1866 */
1867
1868 void
1869 uvm_pagezero(struct vm_page *pg)
1870 {
1871 pg->flags &= ~PG_CLEAN;
1872 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1873 }
1874
1875 /*
1876 * uvm_pagecopy: copy a page
1877 *
1878 * => if page is part of an object then the object should be locked
1879 * to protect pg->flags.
1880 */
1881
1882 void
1883 uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
1884 {
1885
1886 dst->flags &= ~PG_CLEAN;
1887 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
1888 }
1889
1890 /*
1891 * uvm_pageismanaged: test it see that a page (specified by PA) is managed.
1892 */
1893
1894 bool
1895 uvm_pageismanaged(paddr_t pa)
1896 {
1897
1898 return (uvm_physseg_find(atop(pa), NULL) != UVM_PHYSSEG_TYPE_INVALID);
1899 }
1900
1901 /*
1902 * uvm_page_lookup_freelist: look up the free list for the specified page
1903 */
1904
1905 int
1906 uvm_page_lookup_freelist(struct vm_page *pg)
1907 {
1908 uvm_physseg_t upm;
1909
1910 upm = uvm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
1911 KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID);
1912 return uvm_physseg_get_free_list(upm);
1913 }
1914
1915 /*
1916 * uvm_page_locked_p: return true if object associated with page is
1917 * locked. this is a weak check for runtime assertions only.
1918 */
1919
1920 bool
1921 uvm_page_locked_p(struct vm_page *pg)
1922 {
1923
1924 if (pg->uobject != NULL) {
1925 return mutex_owned(pg->uobject->vmobjlock);
1926 }
1927 if (pg->uanon != NULL) {
1928 return mutex_owned(pg->uanon->an_lock);
1929 }
1930 return true;
1931 }
1932
1933 #ifdef PMAP_DIRECT
1934 /*
1935 * Call pmap to translate physical address into a virtual and to run a callback
1936 * for it. Used to avoid actually mapping the pages, pmap most likely uses direct map
1937 * or equivalent.
1938 */
1939 int
1940 uvm_direct_process(struct vm_page **pgs, u_int npages, voff_t off, vsize_t len,
1941 int (*process)(void *, size_t, void *), void *arg)
1942 {
1943 int error = 0;
1944 paddr_t pa;
1945 size_t todo;
1946 voff_t pgoff = (off & PAGE_MASK);
1947 struct vm_page *pg;
1948
1949 KASSERT(npages > 0 && len > 0);
1950
1951 for (int i = 0; i < npages; i++) {
1952 pg = pgs[i];
1953
1954 KASSERT(len > 0);
1955
1956 /*
1957 * Caller is responsible for ensuring all the pages are
1958 * available.
1959 */
1960 KASSERT(pg != NULL && pg != PGO_DONTCARE);
1961
1962 pa = VM_PAGE_TO_PHYS(pg);
1963 todo = MIN(len, PAGE_SIZE - pgoff);
1964
1965 error = pmap_direct_process(pa, pgoff, todo, process, arg);
1966 if (error)
1967 break;
1968
1969 pgoff = 0;
1970 len -= todo;
1971 }
1972
1973 KASSERTMSG(error != 0 || len == 0, "len %lu != 0 for non-error", len);
1974 return error;
1975 }
1976 #endif /* PMAP_DIRECT */
1977
1978 #if defined(DDB) || defined(DEBUGPRINT)
1979
1980 /*
1981 * uvm_page_printit: actually print the page
1982 */
1983
1984 static const char page_flagbits[] = UVM_PGFLAGBITS;
1985
1986 void
1987 uvm_page_printit(struct vm_page *pg, bool full,
1988 void (*pr)(const char *, ...))
1989 {
1990 struct vm_page *tpg;
1991 struct uvm_object *uobj;
1992 struct pgflbucket *pgb;
1993 struct pgflist *pgl;
1994 char pgbuf[128];
1995
1996 (*pr)("PAGE %p:\n", pg);
1997 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags);
1998 (*pr)(" flags=%s, pqflags=%x, wire_count=%d, pa=0x%lx\n",
1999 pgbuf, pg->pqflags, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg));
2000 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
2001 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
2002 (*pr)(" bucket=%d freelist=%d\n",
2003 uvm_page_get_bucket(pg), uvm_page_get_freelist(pg));
2004 #if defined(UVM_PAGE_TRKOWN)
2005 if (pg->flags & PG_BUSY)
2006 (*pr)(" owning process = %d, tag=%s\n",
2007 pg->owner, pg->owner_tag);
2008 else
2009 (*pr)(" page not busy, no owner\n");
2010 #else
2011 (*pr)(" [page ownership tracking disabled]\n");
2012 #endif
2013
2014 if (!full)
2015 return;
2016
2017 /* cross-verify object/anon */
2018 if ((pg->flags & PG_FREE) == 0) {
2019 if (pg->flags & PG_ANON) {
2020 if (pg->uanon == NULL || pg->uanon->an_page != pg)
2021 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
2022 (pg->uanon) ? pg->uanon->an_page : NULL);
2023 else
2024 (*pr)(" anon backpointer is OK\n");
2025 } else {
2026 uobj = pg->uobject;
2027 if (uobj) {
2028 (*pr)(" checking object list\n");
2029 tpg = uvm_pagelookup(uobj, pg->offset);
2030 if (tpg)
2031 (*pr)(" page found on object list\n");
2032 else
2033 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
2034 }
2035 }
2036 }
2037
2038 /* cross-verify page queue */
2039 if (pg->flags & PG_FREE) {
2040 int fl = uvm_page_get_freelist(pg);
2041 int b = uvm_page_get_bucket(pg);
2042 pgb = uvm.page_free[fl].pgfl_buckets[b];
2043 pgl = &pgb->pgb_colors[VM_PGCOLOR(pg)];
2044 (*pr)(" checking pageq list\n");
2045 LIST_FOREACH(tpg, pgl, pageq.list) {
2046 if (tpg == pg) {
2047 break;
2048 }
2049 }
2050 if (tpg)
2051 (*pr)(" page found on pageq list\n");
2052 else
2053 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
2054 }
2055 }
2056
2057 /*
2058 * uvm_page_printall - print a summary of all managed pages
2059 */
2060
2061 void
2062 uvm_page_printall(void (*pr)(const char *, ...))
2063 {
2064 uvm_physseg_t i;
2065 paddr_t pfn;
2066 struct vm_page *pg;
2067
2068 (*pr)("%18s %4s %4s %18s %18s"
2069 #ifdef UVM_PAGE_TRKOWN
2070 " OWNER"
2071 #endif
2072 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON");
2073 for (i = uvm_physseg_get_first();
2074 uvm_physseg_valid_p(i);
2075 i = uvm_physseg_get_next(i)) {
2076 for (pfn = uvm_physseg_get_start(i);
2077 pfn < uvm_physseg_get_end(i);
2078 pfn++) {
2079 pg = PHYS_TO_VM_PAGE(ptoa(pfn));
2080
2081 (*pr)("%18p %04x %08x %18p %18p",
2082 pg, pg->flags, pg->pqflags, pg->uobject,
2083 pg->uanon);
2084 #ifdef UVM_PAGE_TRKOWN
2085 if (pg->flags & PG_BUSY)
2086 (*pr)(" %d [%s]", pg->owner, pg->owner_tag);
2087 #endif
2088 (*pr)("\n");
2089 }
2090 }
2091 }
2092
2093 /*
2094 * uvm_page_print_freelists - print a summary freelists
2095 */
2096
2097 void
2098 uvm_page_print_freelists(void (*pr)(const char *, ...))
2099 {
2100 struct pgfreelist *pgfl;
2101 struct pgflbucket *pgb;
2102 int fl, b, c;
2103
2104 (*pr)("There are %d freelists with %d buckets of %d colors.\n\n",
2105 VM_NFREELIST, uvm.bucketcount, uvmexp.ncolors);
2106
2107 for (fl = 0; fl < VM_NFREELIST; fl++) {
2108 pgfl = &uvm.page_free[fl];
2109 (*pr)("freelist(%d) @ %p\n", fl, pgfl);
2110 for (b = 0; b < uvm.bucketcount; b++) {
2111 pgb = uvm.page_free[fl].pgfl_buckets[b];
2112 (*pr)(" bucket(%d) @ %p, nfree = %d, lock @ %p:\n",
2113 b, pgb, pgb->pgb_nfree,
2114 &uvm_freelist_locks[b].lock);
2115 for (c = 0; c < uvmexp.ncolors; c++) {
2116 (*pr)(" color(%d) @ %p, ", c,
2117 &pgb->pgb_colors[c]);
2118 (*pr)("first page = %p\n",
2119 LIST_FIRST(&pgb->pgb_colors[c]));
2120 }
2121 }
2122 }
2123 }
2124
2125 #endif /* DDB || DEBUGPRINT */
2126