uvm_page.c revision 1.205 1 /* $NetBSD: uvm_page.c,v 1.205 2019/12/16 22:47:55 ad Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
37 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
38 *
39 *
40 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
41 * All rights reserved.
42 *
43 * Permission to use, copy, modify and distribute this software and
44 * its documentation is hereby granted, provided that both the copyright
45 * notice and this permission notice appear in all copies of the
46 * software, derivative works or modified versions, and any portions
47 * thereof, and that both notices appear in supporting documentation.
48 *
49 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
50 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
51 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
52 *
53 * Carnegie Mellon requests users of this software to return to
54 *
55 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
56 * School of Computer Science
57 * Carnegie Mellon University
58 * Pittsburgh PA 15213-3890
59 *
60 * any improvements or extensions that they make and grant Carnegie the
61 * rights to redistribute these changes.
62 */
63
64 /*
65 * uvm_page.c: page ops.
66 */
67
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.205 2019/12/16 22:47:55 ad Exp $");
70
71 #include "opt_ddb.h"
72 #include "opt_uvm.h"
73 #include "opt_uvmhist.h"
74 #include "opt_readahead.h"
75
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/sched.h>
79 #include <sys/kernel.h>
80 #include <sys/vnode.h>
81 #include <sys/proc.h>
82 #include <sys/radixtree.h>
83 #include <sys/atomic.h>
84 #include <sys/cpu.h>
85 #include <sys/extent.h>
86
87 #include <uvm/uvm.h>
88 #include <uvm/uvm_ddb.h>
89 #include <uvm/uvm_pdpolicy.h>
90
91 /*
92 * Some supported CPUs in a given architecture don't support all
93 * of the things necessary to do idle page zero'ing efficiently.
94 * We therefore provide a way to enable it from machdep code here.
95 */
96 bool vm_page_zero_enable = false;
97
98 /*
99 * number of pages per-CPU to reserve for the kernel.
100 */
101 #ifndef UVM_RESERVED_PAGES_PER_CPU
102 #define UVM_RESERVED_PAGES_PER_CPU 5
103 #endif
104 int vm_page_reserve_kernel = UVM_RESERVED_PAGES_PER_CPU;
105
106 /*
107 * physical memory size;
108 */
109 psize_t physmem;
110
111 /*
112 * local variables
113 */
114
115 /*
116 * these variables record the values returned by vm_page_bootstrap,
117 * for debugging purposes. The implementation of uvm_pageboot_alloc
118 * and pmap_startup here also uses them internally.
119 */
120
121 static vaddr_t virtual_space_start;
122 static vaddr_t virtual_space_end;
123
124 /*
125 * we allocate an initial number of page colors in uvm_page_init(),
126 * and remember them. We may re-color pages as cache sizes are
127 * discovered during the autoconfiguration phase. But we can never
128 * free the initial set of buckets, since they are allocated using
129 * uvm_pageboot_alloc().
130 */
131
132 static size_t recolored_pages_memsize /* = 0 */;
133
134 #ifdef DEBUG
135 vaddr_t uvm_zerocheckkva;
136 #endif /* DEBUG */
137
138 /*
139 * These functions are reserved for uvm(9) internal use and are not
140 * exported in the header file uvm_physseg.h
141 *
142 * Thus they are redefined here.
143 */
144 void uvm_physseg_init_seg(uvm_physseg_t, struct vm_page *);
145 void uvm_physseg_seg_chomp_slab(uvm_physseg_t, struct vm_page *, size_t);
146
147 /* returns a pgs array */
148 struct vm_page *uvm_physseg_seg_alloc_from_slab(uvm_physseg_t, size_t);
149
150 /*
151 * local prototypes
152 */
153
154 static int uvm_pageinsert(struct uvm_object *, struct vm_page *);
155 static void uvm_pageremove(struct uvm_object *, struct vm_page *);
156
157 /*
158 * inline functions
159 */
160
161 /*
162 * uvm_pageinsert: insert a page in the object.
163 *
164 * => caller must lock object
165 * => call should have already set pg's object and offset pointers
166 * and bumped the version counter
167 */
168
169 static inline void
170 uvm_pageinsert_object(struct uvm_object *uobj, struct vm_page *pg)
171 {
172
173 KASSERT(uobj == pg->uobject);
174 KASSERT(mutex_owned(uobj->vmobjlock));
175 KASSERT((pg->flags & PG_TABLED) == 0);
176
177 if (UVM_OBJ_IS_VNODE(uobj)) {
178 if (uobj->uo_npages == 0) {
179 struct vnode *vp = (struct vnode *)uobj;
180
181 vholdl(vp);
182 }
183 if (UVM_OBJ_IS_VTEXT(uobj)) {
184 cpu_count(CPU_COUNT_EXECPAGES, 1);
185 } else {
186 cpu_count(CPU_COUNT_FILEPAGES, 1);
187 }
188 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
189 cpu_count(CPU_COUNT_ANONPAGES, 1);
190 }
191 pg->flags |= PG_TABLED;
192 uobj->uo_npages++;
193 }
194
195 static inline int
196 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg)
197 {
198 const uint64_t idx = pg->offset >> PAGE_SHIFT;
199 int error;
200
201 error = radix_tree_insert_node(&uobj->uo_pages, idx, pg);
202 if (error != 0) {
203 return error;
204 }
205 return 0;
206 }
207
208 static inline int
209 uvm_pageinsert(struct uvm_object *uobj, struct vm_page *pg)
210 {
211 int error;
212
213 KDASSERT(uobj != NULL);
214 KDASSERT(uobj == pg->uobject);
215 error = uvm_pageinsert_tree(uobj, pg);
216 if (error != 0) {
217 KASSERT(error == ENOMEM);
218 return error;
219 }
220 uvm_pageinsert_object(uobj, pg);
221 return error;
222 }
223
224 /*
225 * uvm_page_remove: remove page from object.
226 *
227 * => caller must lock object
228 */
229
230 static inline void
231 uvm_pageremove_object(struct uvm_object *uobj, struct vm_page *pg)
232 {
233
234 KASSERT(uobj == pg->uobject);
235 KASSERT(mutex_owned(uobj->vmobjlock));
236 KASSERT(pg->flags & PG_TABLED);
237
238 if (UVM_OBJ_IS_VNODE(uobj)) {
239 if (uobj->uo_npages == 1) {
240 struct vnode *vp = (struct vnode *)uobj;
241
242 holdrelel(vp);
243 }
244 if (UVM_OBJ_IS_VTEXT(uobj)) {
245 cpu_count(CPU_COUNT_EXECPAGES, -1);
246 } else {
247 cpu_count(CPU_COUNT_FILEPAGES, -1);
248 }
249 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
250 cpu_count(CPU_COUNT_ANONPAGES, -1);
251 }
252
253 /* object should be locked */
254 uobj->uo_npages--;
255 pg->flags &= ~PG_TABLED;
256 pg->uobject = NULL;
257 }
258
259 static inline void
260 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg)
261 {
262 struct vm_page *opg __unused;
263
264 opg = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT);
265 KASSERT(pg == opg);
266 }
267
268 static inline void
269 uvm_pageremove(struct uvm_object *uobj, struct vm_page *pg)
270 {
271
272 KDASSERT(uobj != NULL);
273 KASSERT(uobj == pg->uobject);
274 uvm_pageremove_object(uobj, pg);
275 uvm_pageremove_tree(uobj, pg);
276 }
277
278 static void
279 uvm_page_init_buckets(struct pgfreelist *pgfl)
280 {
281 int color, i;
282
283 for (color = 0; color < uvmexp.ncolors; color++) {
284 for (i = 0; i < PGFL_NQUEUES; i++) {
285 LIST_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]);
286 }
287 }
288 }
289
290 /*
291 * uvm_page_init: init the page system. called from uvm_init().
292 *
293 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
294 */
295
296 void
297 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
298 {
299 static struct uvm_cpu boot_cpu;
300 psize_t freepages, pagecount, bucketcount, n;
301 struct pgflbucket *bucketarray, *cpuarray;
302 struct vm_page *pagearray;
303 uvm_physseg_t bank;
304 int lcv;
305
306 KASSERT(ncpu <= 1);
307 CTASSERT(sizeof(pagearray->offset) >= sizeof(struct uvm_cpu *));
308
309 /*
310 * init the page queues and free page queue lock, except the
311 * free list; we allocate that later (with the initial vm_page
312 * structures).
313 */
314
315 uvm.cpus[0] = &boot_cpu;
316 curcpu()->ci_data.cpu_uvm = &boot_cpu;
317 uvmpdpol_init();
318 mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM);
319
320 /*
321 * allocate vm_page structures.
322 */
323
324 /*
325 * sanity check:
326 * before calling this function the MD code is expected to register
327 * some free RAM with the uvm_page_physload() function. our job
328 * now is to allocate vm_page structures for this memory.
329 */
330
331 if (uvm_physseg_get_last() == UVM_PHYSSEG_TYPE_INVALID)
332 panic("uvm_page_bootstrap: no memory pre-allocated");
333
334 /*
335 * first calculate the number of free pages...
336 *
337 * note that we use start/end rather than avail_start/avail_end.
338 * this allows us to allocate extra vm_page structures in case we
339 * want to return some memory to the pool after booting.
340 */
341
342 freepages = 0;
343
344 for (bank = uvm_physseg_get_first();
345 uvm_physseg_valid_p(bank) ;
346 bank = uvm_physseg_get_next(bank)) {
347 freepages += (uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank));
348 }
349
350 /*
351 * Let MD code initialize the number of colors, or default
352 * to 1 color if MD code doesn't care.
353 */
354 if (uvmexp.ncolors == 0)
355 uvmexp.ncolors = 1;
356 uvmexp.colormask = uvmexp.ncolors - 1;
357 KASSERT((uvmexp.colormask & uvmexp.ncolors) == 0);
358
359 /*
360 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
361 * use. for each page of memory we use we need a vm_page structure.
362 * thus, the total number of pages we can use is the total size of
363 * the memory divided by the PAGE_SIZE plus the size of the vm_page
364 * structure. we add one to freepages as a fudge factor to avoid
365 * truncation errors (since we can only allocate in terms of whole
366 * pages).
367 */
368
369 bucketcount = uvmexp.ncolors * VM_NFREELIST;
370 pagecount = ((freepages + 1) << PAGE_SHIFT) /
371 (PAGE_SIZE + sizeof(struct vm_page));
372
373 bucketarray = (void *)uvm_pageboot_alloc((bucketcount *
374 sizeof(struct pgflbucket) * 2) + (pagecount *
375 sizeof(struct vm_page)));
376 cpuarray = bucketarray + bucketcount;
377 pagearray = (struct vm_page *)(bucketarray + bucketcount * 2);
378
379 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
380 uvm.page_free[lcv].pgfl_buckets =
381 (bucketarray + (lcv * uvmexp.ncolors));
382 uvm_page_init_buckets(&uvm.page_free[lcv]);
383 uvm.cpus[0]->page_free[lcv].pgfl_buckets =
384 (cpuarray + (lcv * uvmexp.ncolors));
385 uvm_page_init_buckets(&uvm.cpus[0]->page_free[lcv]);
386 }
387 memset(pagearray, 0, pagecount * sizeof(struct vm_page));
388
389 /*
390 * init the vm_page structures and put them in the correct place.
391 */
392 /* First init the extent */
393
394 for (bank = uvm_physseg_get_first(),
395 uvm_physseg_seg_chomp_slab(bank, pagearray, pagecount);
396 uvm_physseg_valid_p(bank);
397 bank = uvm_physseg_get_next(bank)) {
398
399 n = uvm_physseg_get_end(bank) - uvm_physseg_get_start(bank);
400 uvm_physseg_seg_alloc_from_slab(bank, n);
401 uvm_physseg_init_seg(bank, pagearray);
402
403 /* set up page array pointers */
404 pagearray += n;
405 pagecount -= n;
406 }
407
408 /*
409 * pass up the values of virtual_space_start and
410 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
411 * layers of the VM.
412 */
413
414 *kvm_startp = round_page(virtual_space_start);
415 *kvm_endp = trunc_page(virtual_space_end);
416 #ifdef DEBUG
417 /*
418 * steal kva for uvm_pagezerocheck().
419 */
420 uvm_zerocheckkva = *kvm_startp;
421 *kvm_startp += PAGE_SIZE;
422 #endif /* DEBUG */
423
424 /*
425 * init various thresholds.
426 */
427
428 uvmexp.reserve_pagedaemon = 1;
429 uvmexp.reserve_kernel = vm_page_reserve_kernel;
430
431 /*
432 * determine if we should zero pages in the idle loop.
433 */
434
435 uvm.cpus[0]->page_idle_zero = vm_page_zero_enable;
436
437 /*
438 * done!
439 */
440
441 uvm.page_init_done = true;
442 }
443
444 /*
445 * uvm_setpagesize: set the page size
446 *
447 * => sets page_shift and page_mask from uvmexp.pagesize.
448 */
449
450 void
451 uvm_setpagesize(void)
452 {
453
454 /*
455 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
456 * to be a constant (indicated by being a non-zero value).
457 */
458 if (uvmexp.pagesize == 0) {
459 if (PAGE_SIZE == 0)
460 panic("uvm_setpagesize: uvmexp.pagesize not set");
461 uvmexp.pagesize = PAGE_SIZE;
462 }
463 uvmexp.pagemask = uvmexp.pagesize - 1;
464 if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
465 panic("uvm_setpagesize: page size %u (%#x) not a power of two",
466 uvmexp.pagesize, uvmexp.pagesize);
467 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
468 if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
469 break;
470 }
471
472 /*
473 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
474 */
475
476 vaddr_t
477 uvm_pageboot_alloc(vsize_t size)
478 {
479 static bool initialized = false;
480 vaddr_t addr;
481 #if !defined(PMAP_STEAL_MEMORY)
482 vaddr_t vaddr;
483 paddr_t paddr;
484 #endif
485
486 /*
487 * on first call to this function, initialize ourselves.
488 */
489 if (initialized == false) {
490 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
491
492 /* round it the way we like it */
493 virtual_space_start = round_page(virtual_space_start);
494 virtual_space_end = trunc_page(virtual_space_end);
495
496 initialized = true;
497 }
498
499 /* round to page size */
500 size = round_page(size);
501 uvmexp.bootpages += atop(size);
502
503 #if defined(PMAP_STEAL_MEMORY)
504
505 /*
506 * defer bootstrap allocation to MD code (it may want to allocate
507 * from a direct-mapped segment). pmap_steal_memory should adjust
508 * virtual_space_start/virtual_space_end if necessary.
509 */
510
511 addr = pmap_steal_memory(size, &virtual_space_start,
512 &virtual_space_end);
513
514 return(addr);
515
516 #else /* !PMAP_STEAL_MEMORY */
517
518 /*
519 * allocate virtual memory for this request
520 */
521 if (virtual_space_start == virtual_space_end ||
522 (virtual_space_end - virtual_space_start) < size)
523 panic("uvm_pageboot_alloc: out of virtual space");
524
525 addr = virtual_space_start;
526
527 #ifdef PMAP_GROWKERNEL
528 /*
529 * If the kernel pmap can't map the requested space,
530 * then allocate more resources for it.
531 */
532 if (uvm_maxkaddr < (addr + size)) {
533 uvm_maxkaddr = pmap_growkernel(addr + size);
534 if (uvm_maxkaddr < (addr + size))
535 panic("uvm_pageboot_alloc: pmap_growkernel() failed");
536 }
537 #endif
538
539 virtual_space_start += size;
540
541 /*
542 * allocate and mapin physical pages to back new virtual pages
543 */
544
545 for (vaddr = round_page(addr) ; vaddr < addr + size ;
546 vaddr += PAGE_SIZE) {
547
548 if (!uvm_page_physget(&paddr))
549 panic("uvm_pageboot_alloc: out of memory");
550
551 /*
552 * Note this memory is no longer managed, so using
553 * pmap_kenter is safe.
554 */
555 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
556 }
557 pmap_update(pmap_kernel());
558 return(addr);
559 #endif /* PMAP_STEAL_MEMORY */
560 }
561
562 #if !defined(PMAP_STEAL_MEMORY)
563 /*
564 * uvm_page_physget: "steal" one page from the vm_physmem structure.
565 *
566 * => attempt to allocate it off the end of a segment in which the "avail"
567 * values match the start/end values. if we can't do that, then we
568 * will advance both values (making them equal, and removing some
569 * vm_page structures from the non-avail area).
570 * => return false if out of memory.
571 */
572
573 /* subroutine: try to allocate from memory chunks on the specified freelist */
574 static bool uvm_page_physget_freelist(paddr_t *, int);
575
576 static bool
577 uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
578 {
579 uvm_physseg_t lcv;
580
581 /* pass 1: try allocating from a matching end */
582 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
583 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv))
584 #else
585 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv))
586 #endif
587 {
588 if (uvm.page_init_done == true)
589 panic("uvm_page_physget: called _after_ bootstrap");
590
591 /* Try to match at front or back on unused segment */
592 if (uvm_page_physunload(lcv, freelist, paddrp))
593 return true;
594 }
595
596 /* pass2: forget about matching ends, just allocate something */
597 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
598 for (lcv = uvm_physseg_get_last(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_prev(lcv))
599 #else
600 for (lcv = uvm_physseg_get_first(); uvm_physseg_valid_p(lcv); lcv = uvm_physseg_get_next(lcv))
601 #endif
602 {
603 /* Try the front regardless. */
604 if (uvm_page_physunload_force(lcv, freelist, paddrp))
605 return true;
606 }
607 return false;
608 }
609
610 bool
611 uvm_page_physget(paddr_t *paddrp)
612 {
613 int i;
614
615 /* try in the order of freelist preference */
616 for (i = 0; i < VM_NFREELIST; i++)
617 if (uvm_page_physget_freelist(paddrp, i) == true)
618 return (true);
619 return (false);
620 }
621 #endif /* PMAP_STEAL_MEMORY */
622
623 /*
624 * PHYS_TO_VM_PAGE: find vm_page for a PA. used by MI code to get vm_pages
625 * back from an I/O mapping (ugh!). used in some MD code as well.
626 */
627 struct vm_page *
628 uvm_phys_to_vm_page(paddr_t pa)
629 {
630 paddr_t pf = atop(pa);
631 paddr_t off;
632 uvm_physseg_t upm;
633
634 upm = uvm_physseg_find(pf, &off);
635 if (upm != UVM_PHYSSEG_TYPE_INVALID)
636 return uvm_physseg_get_pg(upm, off);
637 return(NULL);
638 }
639
640 paddr_t
641 uvm_vm_page_to_phys(const struct vm_page *pg)
642 {
643
644 return pg->phys_addr;
645 }
646
647 /*
648 * uvm_page_recolor: Recolor the pages if the new bucket count is
649 * larger than the old one.
650 */
651
652 void
653 uvm_page_recolor(int newncolors)
654 {
655 struct pgflbucket *bucketarray, *cpuarray, *oldbucketarray;
656 struct pgfreelist gpgfl, pgfl;
657 struct vm_page *pg;
658 vsize_t bucketcount;
659 size_t bucketmemsize, oldbucketmemsize;
660 int color, i, ocolors;
661 int lcv;
662 struct uvm_cpu *ucpu;
663
664 KASSERT(((newncolors - 1) & newncolors) == 0);
665
666 if (newncolors <= uvmexp.ncolors)
667 return;
668
669 if (uvm.page_init_done == false) {
670 uvmexp.ncolors = newncolors;
671 return;
672 }
673
674 bucketcount = newncolors * VM_NFREELIST;
675 bucketmemsize = bucketcount * sizeof(struct pgflbucket) * 2;
676 bucketarray = kmem_alloc(bucketmemsize, KM_SLEEP);
677 cpuarray = bucketarray + bucketcount;
678
679 mutex_spin_enter(&uvm_fpageqlock);
680
681 /* Make sure we should still do this. */
682 if (newncolors <= uvmexp.ncolors) {
683 mutex_spin_exit(&uvm_fpageqlock);
684 kmem_free(bucketarray, bucketmemsize);
685 return;
686 }
687
688 oldbucketarray = uvm.page_free[0].pgfl_buckets;
689 ocolors = uvmexp.ncolors;
690
691 uvmexp.ncolors = newncolors;
692 uvmexp.colormask = uvmexp.ncolors - 1;
693
694 ucpu = curcpu()->ci_data.cpu_uvm;
695 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
696 gpgfl.pgfl_buckets = (bucketarray + (lcv * newncolors));
697 pgfl.pgfl_buckets = (cpuarray + (lcv * uvmexp.ncolors));
698 uvm_page_init_buckets(&gpgfl);
699 uvm_page_init_buckets(&pgfl);
700 for (color = 0; color < ocolors; color++) {
701 for (i = 0; i < PGFL_NQUEUES; i++) {
702 while ((pg = LIST_FIRST(&uvm.page_free[
703 lcv].pgfl_buckets[color].pgfl_queues[i]))
704 != NULL) {
705 LIST_REMOVE(pg, pageq.list); /* global */
706 LIST_REMOVE(pg, listq.list); /* cpu */
707 LIST_INSERT_HEAD(&gpgfl.pgfl_buckets[
708 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
709 i], pg, pageq.list);
710 LIST_INSERT_HEAD(&pgfl.pgfl_buckets[
711 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
712 i], pg, listq.list);
713 }
714 }
715 }
716 uvm.page_free[lcv].pgfl_buckets = gpgfl.pgfl_buckets;
717 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
718 }
719
720 oldbucketmemsize = recolored_pages_memsize;
721
722 recolored_pages_memsize = bucketmemsize;
723 mutex_spin_exit(&uvm_fpageqlock);
724
725 if (oldbucketmemsize) {
726 kmem_free(oldbucketarray, oldbucketmemsize);
727 }
728
729 /*
730 * this calls uvm_km_alloc() which may want to hold
731 * uvm_fpageqlock.
732 */
733 uvm_pager_realloc_emerg();
734 }
735
736 /*
737 * uvm_cpu_attach: initialize per-CPU data structures.
738 */
739
740 void
741 uvm_cpu_attach(struct cpu_info *ci)
742 {
743 struct pgflbucket *bucketarray;
744 struct pgfreelist pgfl;
745 struct uvm_cpu *ucpu;
746 vsize_t bucketcount;
747 int lcv;
748
749 if (CPU_IS_PRIMARY(ci)) {
750 /* Already done in uvm_page_init(). */
751 goto attachrnd;
752 }
753
754 /* Add more reserve pages for this CPU. */
755 uvmexp.reserve_kernel += vm_page_reserve_kernel;
756
757 /* Configure this CPU's free lists. */
758 bucketcount = uvmexp.ncolors * VM_NFREELIST;
759 bucketarray = kmem_alloc(bucketcount * sizeof(struct pgflbucket),
760 KM_SLEEP);
761 ucpu = kmem_zalloc(sizeof(*ucpu), KM_SLEEP);
762 uvm.cpus[cpu_index(ci)] = ucpu;
763 ci->ci_data.cpu_uvm = ucpu;
764 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
765 pgfl.pgfl_buckets = (bucketarray + (lcv * uvmexp.ncolors));
766 uvm_page_init_buckets(&pgfl);
767 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
768 }
769
770 attachrnd:
771 /*
772 * Attach RNG source for this CPU's VM events
773 */
774 rnd_attach_source(&uvm.cpus[cpu_index(ci)]->rs,
775 ci->ci_data.cpu_name, RND_TYPE_VM,
776 RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE|
777 RND_FLAG_ESTIMATE_VALUE);
778
779 }
780
781 /*
782 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
783 */
784
785 static struct vm_page *
786 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int flist, int try1, int try2,
787 int *trycolorp)
788 {
789 struct pgflist *freeq;
790 struct vm_page *pg;
791 int color, trycolor = *trycolorp;
792 struct pgfreelist *gpgfl, *pgfl;
793
794 KASSERT(mutex_owned(&uvm_fpageqlock));
795
796 color = trycolor;
797 pgfl = &ucpu->page_free[flist];
798 gpgfl = &uvm.page_free[flist];
799 do {
800 /* cpu, try1 */
801 if ((pg = LIST_FIRST((freeq =
802 &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
803 KASSERT(pg->flags & PG_FREE);
804 KASSERT(try1 == PGFL_ZEROS || !(pg->flags & PG_ZERO));
805 KASSERT(try1 == PGFL_UNKNOWN || (pg->flags & PG_ZERO));
806 KASSERT(ucpu == VM_FREE_PAGE_TO_CPU(pg));
807 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
808 CPU_COUNT(CPU_COUNT_CPUHIT, 1);
809 goto gotit;
810 }
811 /* global, try1 */
812 if ((pg = LIST_FIRST((freeq =
813 &gpgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
814 KASSERT(pg->flags & PG_FREE);
815 KASSERT(try1 == PGFL_ZEROS || !(pg->flags & PG_ZERO));
816 KASSERT(try1 == PGFL_UNKNOWN || (pg->flags & PG_ZERO));
817 KASSERT(ucpu != VM_FREE_PAGE_TO_CPU(pg));
818 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
819 CPU_COUNT(CPU_COUNT_CPUMISS, 1);
820 goto gotit;
821 }
822 /* cpu, try2 */
823 if ((pg = LIST_FIRST((freeq =
824 &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
825 KASSERT(pg->flags & PG_FREE);
826 KASSERT(try2 == PGFL_ZEROS || !(pg->flags & PG_ZERO));
827 KASSERT(try2 == PGFL_UNKNOWN || (pg->flags & PG_ZERO));
828 KASSERT(ucpu == VM_FREE_PAGE_TO_CPU(pg));
829 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
830 CPU_COUNT(CPU_COUNT_CPUHIT, 1);
831 goto gotit;
832 }
833 /* global, try2 */
834 if ((pg = LIST_FIRST((freeq =
835 &gpgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
836 KASSERT(pg->flags & PG_FREE);
837 KASSERT(try2 == PGFL_ZEROS || !(pg->flags & PG_ZERO));
838 KASSERT(try2 == PGFL_UNKNOWN || (pg->flags & PG_ZERO));
839 KASSERT(ucpu != VM_FREE_PAGE_TO_CPU(pg));
840 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
841 CPU_COUNT(CPU_COUNT_CPUMISS, 1);
842 goto gotit;
843 }
844 color = (color + 1) & uvmexp.colormask;
845 } while (color != trycolor);
846
847 return (NULL);
848
849 gotit:
850 LIST_REMOVE(pg, pageq.list); /* global list */
851 LIST_REMOVE(pg, listq.list); /* per-cpu list */
852 uvmexp.free--;
853
854 /* update zero'd page count */
855 if (pg->flags & PG_ZERO)
856 CPU_COUNT(CPU_COUNT_ZEROPAGES, -1);
857
858 if (color == trycolor)
859 CPU_COUNT(CPU_COUNT_COLORHIT, 1);
860 else {
861 CPU_COUNT(CPU_COUNT_COLORMISS, 1);
862 *trycolorp = color;
863 }
864
865 return (pg);
866 }
867
868 /*
869 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
870 *
871 * => return null if no pages free
872 * => wake up pagedaemon if number of free pages drops below low water mark
873 * => if obj != NULL, obj must be locked (to put in obj's tree)
874 * => if anon != NULL, anon must be locked (to put in anon)
875 * => only one of obj or anon can be non-null
876 * => caller must activate/deactivate page if it is not wired.
877 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
878 * => policy decision: it is more important to pull a page off of the
879 * appropriate priority free list than it is to get a zero'd or
880 * unknown contents page. This is because we live with the
881 * consequences of a bad free list decision for the entire
882 * lifetime of the page, e.g. if the page comes from memory that
883 * is slower to access.
884 */
885
886 struct vm_page *
887 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
888 int flags, int strat, int free_list)
889 {
890 int try1, try2, zeroit = 0, color;
891 int lcv, error;
892 struct uvm_cpu *ucpu;
893 struct vm_page *pg;
894 lwp_t *l;
895
896 KASSERT(obj == NULL || anon == NULL);
897 KASSERT(anon == NULL || (flags & UVM_FLAG_COLORMATCH) || off == 0);
898 KASSERT(off == trunc_page(off));
899 KASSERT(obj == NULL || mutex_owned(obj->vmobjlock));
900 KASSERT(anon == NULL || anon->an_lock == NULL ||
901 mutex_owned(anon->an_lock));
902
903 mutex_spin_enter(&uvm_fpageqlock);
904
905 /*
906 * This implements a global round-robin page coloring
907 * algorithm.
908 */
909
910 ucpu = curcpu()->ci_data.cpu_uvm;
911 if (flags & UVM_FLAG_COLORMATCH) {
912 color = atop(off) & uvmexp.colormask;
913 } else {
914 color = ucpu->page_free_nextcolor;
915 }
916
917 /*
918 * check to see if we need to generate some free pages waking
919 * the pagedaemon.
920 */
921
922 uvm_kick_pdaemon();
923
924 /*
925 * fail if any of these conditions is true:
926 * [1] there really are no free pages, or
927 * [2] only kernel "reserved" pages remain and
928 * reserved pages have not been requested.
929 * [3] only pagedaemon "reserved" pages remain and
930 * the requestor isn't the pagedaemon.
931 * we make kernel reserve pages available if called by a
932 * kernel thread or a realtime thread.
933 */
934 l = curlwp;
935 if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) {
936 flags |= UVM_PGA_USERESERVE;
937 }
938 if ((uvmexp.free <= uvmexp.reserve_kernel &&
939 (flags & UVM_PGA_USERESERVE) == 0) ||
940 (uvmexp.free <= uvmexp.reserve_pagedaemon &&
941 curlwp != uvm.pagedaemon_lwp))
942 goto fail;
943
944 #if PGFL_NQUEUES != 2
945 #error uvm_pagealloc_strat needs to be updated
946 #endif
947
948 /*
949 * If we want a zero'd page, try the ZEROS queue first, otherwise
950 * we try the UNKNOWN queue first.
951 */
952 if (flags & UVM_PGA_ZERO) {
953 try1 = PGFL_ZEROS;
954 try2 = PGFL_UNKNOWN;
955 } else {
956 try1 = PGFL_UNKNOWN;
957 try2 = PGFL_ZEROS;
958 }
959
960 again:
961 switch (strat) {
962 case UVM_PGA_STRAT_NORMAL:
963 /* Check freelists: descending priority (ascending id) order */
964 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
965 pg = uvm_pagealloc_pgfl(ucpu, lcv,
966 try1, try2, &color);
967 if (pg != NULL)
968 goto gotit;
969 }
970
971 /* No pages free! */
972 goto fail;
973
974 case UVM_PGA_STRAT_ONLY:
975 case UVM_PGA_STRAT_FALLBACK:
976 /* Attempt to allocate from the specified free list. */
977 KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
978 pg = uvm_pagealloc_pgfl(ucpu, free_list,
979 try1, try2, &color);
980 if (pg != NULL)
981 goto gotit;
982
983 /* Fall back, if possible. */
984 if (strat == UVM_PGA_STRAT_FALLBACK) {
985 strat = UVM_PGA_STRAT_NORMAL;
986 goto again;
987 }
988
989 /* No pages free! */
990 goto fail;
991
992 default:
993 panic("uvm_pagealloc_strat: bad strat %d", strat);
994 /* NOTREACHED */
995 }
996
997 gotit:
998 /*
999 * We now know which color we actually allocated from; set
1000 * the next color accordingly.
1001 */
1002
1003 ucpu->page_free_nextcolor = (color + 1) & uvmexp.colormask;
1004
1005 /*
1006 * update allocation statistics and remember if we have to
1007 * zero the page
1008 */
1009
1010 if (flags & UVM_PGA_ZERO) {
1011 if (pg->flags & PG_ZERO) {
1012 CPU_COUNT(CPU_COUNT_PGA_ZEROHIT, 1);
1013 zeroit = 0;
1014 } else {
1015 CPU_COUNT(CPU_COUNT_PGA_ZEROMISS, 1);
1016 zeroit = 1;
1017 }
1018 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
1019 ucpu->page_idle_zero = vm_page_zero_enable;
1020 }
1021 }
1022 KASSERT((pg->flags & ~(PG_ZERO|PG_FREE)) == 0);
1023
1024 /*
1025 * For now check this - later on we may do lazy dequeue, but need
1026 * to get page.queue used only by the pagedaemon policy first.
1027 */
1028 KASSERT(!uvmpdpol_pageisqueued_p(pg));
1029
1030 /*
1031 * assign the page to the object. we don't need to lock the page's
1032 * identity to do this, as the caller holds the objects locked, and
1033 * the page is not on any paging queues at this time.
1034 */
1035 pg->offset = off;
1036 pg->uobject = obj;
1037 pg->uanon = anon;
1038 KASSERT(uvm_page_locked_p(pg));
1039 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
1040 mutex_spin_exit(&uvm_fpageqlock);
1041 if (anon) {
1042 anon->an_page = pg;
1043 pg->flags |= PG_ANON;
1044 cpu_count(CPU_COUNT_ANONPAGES, 1);
1045 } else if (obj) {
1046 error = uvm_pageinsert(obj, pg);
1047 if (error != 0) {
1048 pg->uobject = NULL;
1049 uvm_pagefree(pg);
1050 return NULL;
1051 }
1052 }
1053
1054 #if defined(UVM_PAGE_TRKOWN)
1055 pg->owner_tag = NULL;
1056 #endif
1057 UVM_PAGE_OWN(pg, "new alloc");
1058
1059 if (flags & UVM_PGA_ZERO) {
1060 /*
1061 * A zero'd page is not clean. If we got a page not already
1062 * zero'd, then we have to zero it ourselves.
1063 */
1064 pg->flags &= ~PG_CLEAN;
1065 if (zeroit)
1066 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1067 }
1068
1069 return(pg);
1070
1071 fail:
1072 mutex_spin_exit(&uvm_fpageqlock);
1073 return (NULL);
1074 }
1075
1076 /*
1077 * uvm_pagereplace: replace a page with another
1078 *
1079 * => object must be locked
1080 * => interlock must be held
1081 */
1082
1083 void
1084 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg)
1085 {
1086 struct uvm_object *uobj = oldpg->uobject;
1087
1088 KASSERT((oldpg->flags & PG_TABLED) != 0);
1089 KASSERT(uobj != NULL);
1090 KASSERT((newpg->flags & PG_TABLED) == 0);
1091 KASSERT(newpg->uobject == NULL);
1092 KASSERT(mutex_owned(uobj->vmobjlock));
1093
1094 newpg->uobject = uobj;
1095 newpg->offset = oldpg->offset;
1096
1097 uvm_pageremove_tree(uobj, oldpg);
1098 uvm_pageinsert_tree(uobj, newpg);
1099 uvm_pageinsert_object(uobj, newpg);
1100 uvm_pageremove_object(uobj, oldpg);
1101 }
1102
1103 /*
1104 * uvm_pagerealloc: reallocate a page from one object to another
1105 *
1106 * => both objects must be locked
1107 * => both interlocks must be held
1108 */
1109
1110 void
1111 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
1112 {
1113 /*
1114 * remove it from the old object
1115 */
1116
1117 if (pg->uobject) {
1118 uvm_pageremove(pg->uobject, pg);
1119 }
1120
1121 /*
1122 * put it in the new object
1123 */
1124
1125 if (newobj) {
1126 /*
1127 * XXX we have no in-tree users of this functionality
1128 */
1129 panic("uvm_pagerealloc: no impl");
1130 }
1131 }
1132
1133 #ifdef DEBUG
1134 /*
1135 * check if page is zero-filled
1136 */
1137 void
1138 uvm_pagezerocheck(struct vm_page *pg)
1139 {
1140 int *p, *ep;
1141
1142 KASSERT(uvm_zerocheckkva != 0);
1143 KASSERT(mutex_owned(&uvm_fpageqlock));
1144
1145 /*
1146 * XXX assuming pmap_kenter_pa and pmap_kremove never call
1147 * uvm page allocator.
1148 *
1149 * it might be better to have "CPU-local temporary map" pmap interface.
1150 */
1151 pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ, 0);
1152 p = (int *)uvm_zerocheckkva;
1153 ep = (int *)((char *)p + PAGE_SIZE);
1154 pmap_update(pmap_kernel());
1155 while (p < ep) {
1156 if (*p != 0)
1157 panic("PG_ZERO page isn't zero-filled");
1158 p++;
1159 }
1160 pmap_kremove(uvm_zerocheckkva, PAGE_SIZE);
1161 /*
1162 * pmap_update() is not necessary here because no one except us
1163 * uses this VA.
1164 */
1165 }
1166 #endif /* DEBUG */
1167
1168 /*
1169 * uvm_pagefree: free page
1170 *
1171 * => erase page's identity (i.e. remove from object)
1172 * => put page on free list
1173 * => caller must lock owning object (either anon or uvm_object)
1174 * => assumes all valid mappings of pg are gone
1175 */
1176
1177 void
1178 uvm_pagefree(struct vm_page *pg)
1179 {
1180 struct pgflist *pgfl;
1181 struct uvm_cpu *ucpu;
1182 int index, color, queue;
1183 bool iszero, locked;
1184
1185 #ifdef DEBUG
1186 if (pg->uobject == (void *)0xdeadbeef &&
1187 pg->uanon == (void *)0xdeadbeef) {
1188 panic("uvm_pagefree: freeing free page %p", pg);
1189 }
1190 #endif /* DEBUG */
1191
1192 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1193 KASSERT(!(pg->flags & PG_FREE));
1194 //KASSERT(mutex_owned(&uvm_pageqlock) || !uvmpdpol_pageisqueued_p(pg));
1195 KASSERT(pg->uobject == NULL || mutex_owned(pg->uobject->vmobjlock));
1196 KASSERT(pg->uobject != NULL || pg->uanon == NULL ||
1197 mutex_owned(pg->uanon->an_lock));
1198
1199 /*
1200 * if the page is loaned, resolve the loan instead of freeing.
1201 */
1202
1203 if (pg->loan_count) {
1204 KASSERT(pg->wire_count == 0);
1205
1206 /*
1207 * if the page is owned by an anon then we just want to
1208 * drop anon ownership. the kernel will free the page when
1209 * it is done with it. if the page is owned by an object,
1210 * remove it from the object and mark it dirty for the benefit
1211 * of possible anon owners.
1212 *
1213 * regardless of previous ownership, wakeup any waiters,
1214 * unbusy the page, and we're done.
1215 */
1216
1217 mutex_enter(&pg->interlock);
1218 locked = true;
1219 if (pg->uobject != NULL) {
1220 uvm_pageremove(pg->uobject, pg);
1221 pg->flags &= ~PG_CLEAN;
1222 } else if (pg->uanon != NULL) {
1223 if ((pg->flags & PG_ANON) == 0) {
1224 pg->loan_count--;
1225 } else {
1226 pg->flags &= ~PG_ANON;
1227 cpu_count(CPU_COUNT_ANONPAGES, -1);
1228 }
1229 pg->uanon->an_page = NULL;
1230 pg->uanon = NULL;
1231 }
1232 if (pg->flags & PG_WANTED) {
1233 wakeup(pg);
1234 }
1235 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
1236 #ifdef UVM_PAGE_TRKOWN
1237 pg->owner_tag = NULL;
1238 #endif
1239 if (pg->loan_count) {
1240 KASSERT(pg->uobject == NULL);
1241 mutex_exit(&pg->interlock);
1242 if (pg->uanon == NULL) {
1243 uvm_pagedequeue(pg);
1244 }
1245 return;
1246 }
1247 } else if (pg->uobject != NULL || pg->uanon != NULL ||
1248 pg->wire_count != 0) {
1249 mutex_enter(&pg->interlock);
1250 locked = true;
1251 } else {
1252 locked = false;
1253 }
1254
1255 /*
1256 * remove page from its object or anon.
1257 */
1258 if (pg->uobject != NULL) {
1259 uvm_pageremove(pg->uobject, pg);
1260 } else if (pg->uanon != NULL) {
1261 pg->uanon->an_page = NULL;
1262 pg->uanon = NULL;
1263 cpu_count(CPU_COUNT_ANONPAGES, -1);
1264 }
1265
1266 /*
1267 * if the page was wired, unwire it now.
1268 */
1269
1270 if (pg->wire_count) {
1271 pg->wire_count = 0;
1272 atomic_dec_uint(&uvmexp.wired);
1273 }
1274 if (locked) {
1275 mutex_exit(&pg->interlock);
1276 }
1277
1278 /*
1279 * now remove the page from the queues.
1280 */
1281 uvm_pagedequeue(pg);
1282
1283 /*
1284 * and put on free queue
1285 */
1286
1287 iszero = (pg->flags & PG_ZERO);
1288 index = uvm_page_lookup_freelist(pg);
1289 color = VM_PGCOLOR_BUCKET(pg);
1290 queue = (iszero ? PGFL_ZEROS : PGFL_UNKNOWN);
1291
1292 #ifdef DEBUG
1293 pg->uobject = (void *)0xdeadbeef;
1294 pg->uanon = (void *)0xdeadbeef;
1295 #endif
1296
1297 mutex_spin_enter(&uvm_fpageqlock);
1298 pg->flags = PG_FREE;
1299
1300 #ifdef DEBUG
1301 if (iszero)
1302 uvm_pagezerocheck(pg);
1303 #endif /* DEBUG */
1304
1305
1306 /* global list */
1307 pgfl = &uvm.page_free[index].pgfl_buckets[color].pgfl_queues[queue];
1308 LIST_INSERT_HEAD(pgfl, pg, pageq.list);
1309 uvmexp.free++;
1310 if (iszero) {
1311 CPU_COUNT(CPU_COUNT_ZEROPAGES, 1);
1312 }
1313
1314 /* per-cpu list */
1315 ucpu = curcpu()->ci_data.cpu_uvm;
1316 pg->offset = (uintptr_t)ucpu;
1317 pgfl = &ucpu->page_free[index].pgfl_buckets[color].pgfl_queues[queue];
1318 LIST_INSERT_HEAD(pgfl, pg, listq.list);
1319 ucpu->pages[queue]++;
1320 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
1321 ucpu->page_idle_zero = vm_page_zero_enable;
1322 }
1323
1324 mutex_spin_exit(&uvm_fpageqlock);
1325 }
1326
1327 /*
1328 * uvm_page_unbusy: unbusy an array of pages.
1329 *
1330 * => pages must either all belong to the same object, or all belong to anons.
1331 * => if pages are object-owned, object must be locked.
1332 * => if pages are anon-owned, anons must be locked.
1333 * => caller must make sure that anon-owned pages are not PG_RELEASED.
1334 */
1335
1336 void
1337 uvm_page_unbusy(struct vm_page **pgs, int npgs)
1338 {
1339 struct vm_page *pg;
1340 int i;
1341 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
1342
1343 for (i = 0; i < npgs; i++) {
1344 pg = pgs[i];
1345 if (pg == NULL || pg == PGO_DONTCARE) {
1346 continue;
1347 }
1348
1349 KASSERT(uvm_page_locked_p(pg));
1350 KASSERT(pg->flags & PG_BUSY);
1351 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1352 if (pg->flags & PG_WANTED) {
1353 /* XXXAD thundering herd problem. */
1354 wakeup(pg);
1355 }
1356 if (pg->flags & PG_RELEASED) {
1357 UVMHIST_LOG(ubchist, "releasing pg %#jx",
1358 (uintptr_t)pg, 0, 0, 0);
1359 KASSERT(pg->uobject != NULL ||
1360 (pg->uanon != NULL && pg->uanon->an_ref > 0));
1361 pg->flags &= ~PG_RELEASED;
1362 uvm_pagefree(pg);
1363 } else {
1364 UVMHIST_LOG(ubchist, "unbusying pg %#jx",
1365 (uintptr_t)pg, 0, 0, 0);
1366 KASSERT((pg->flags & PG_FAKE) == 0);
1367 pg->flags &= ~(PG_WANTED|PG_BUSY);
1368 UVM_PAGE_OWN(pg, NULL);
1369 }
1370 }
1371 }
1372
1373 #if defined(UVM_PAGE_TRKOWN)
1374 /*
1375 * uvm_page_own: set or release page ownership
1376 *
1377 * => this is a debugging function that keeps track of who sets PG_BUSY
1378 * and where they do it. it can be used to track down problems
1379 * such a process setting "PG_BUSY" and never releasing it.
1380 * => page's object [if any] must be locked
1381 * => if "tag" is NULL then we are releasing page ownership
1382 */
1383 void
1384 uvm_page_own(struct vm_page *pg, const char *tag)
1385 {
1386
1387 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
1388 KASSERT((pg->flags & PG_WANTED) == 0);
1389 KASSERT(uvm_page_locked_p(pg));
1390
1391 /* gain ownership? */
1392 if (tag) {
1393 KASSERT((pg->flags & PG_BUSY) != 0);
1394 if (pg->owner_tag) {
1395 printf("uvm_page_own: page %p already owned "
1396 "by proc %d [%s]\n", pg,
1397 pg->owner, pg->owner_tag);
1398 panic("uvm_page_own");
1399 }
1400 pg->owner = curproc->p_pid;
1401 pg->lowner = curlwp->l_lid;
1402 pg->owner_tag = tag;
1403 return;
1404 }
1405
1406 /* drop ownership */
1407 KASSERT((pg->flags & PG_BUSY) == 0);
1408 if (pg->owner_tag == NULL) {
1409 printf("uvm_page_own: dropping ownership of an non-owned "
1410 "page (%p)\n", pg);
1411 panic("uvm_page_own");
1412 }
1413 if (!uvmpdpol_pageisqueued_p(pg)) {
1414 KASSERT((pg->uanon == NULL && pg->uobject == NULL) ||
1415 pg->wire_count > 0);
1416 } else {
1417 KASSERT(pg->wire_count == 0);
1418 }
1419 pg->owner_tag = NULL;
1420 }
1421 #endif
1422
1423 /*
1424 * uvm_pageidlezero: zero free pages while the system is idle.
1425 *
1426 * => try to complete one color bucket at a time, to reduce our impact
1427 * on the CPU cache.
1428 * => we loop until we either reach the target or there is a lwp ready
1429 * to run, or MD code detects a reason to break early.
1430 */
1431 void
1432 uvm_pageidlezero(void)
1433 {
1434 struct vm_page *pg;
1435 struct pgfreelist *pgfl, *gpgfl;
1436 struct uvm_cpu *ucpu;
1437 int free_list, firstbucket, nextbucket;
1438 bool lcont = false;
1439
1440 ucpu = curcpu()->ci_data.cpu_uvm;
1441 if (!ucpu->page_idle_zero ||
1442 ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
1443 ucpu->page_idle_zero = false;
1444 return;
1445 }
1446 if (!mutex_tryenter(&uvm_fpageqlock)) {
1447 /* Contention: let other CPUs to use the lock. */
1448 return;
1449 }
1450 firstbucket = ucpu->page_free_nextcolor;
1451 nextbucket = firstbucket;
1452 do {
1453 for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
1454 if (sched_curcpu_runnable_p()) {
1455 goto quit;
1456 }
1457 pgfl = &ucpu->page_free[free_list];
1458 gpgfl = &uvm.page_free[free_list];
1459 while ((pg = LIST_FIRST(&pgfl->pgfl_buckets[
1460 nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
1461 if (lcont || sched_curcpu_runnable_p()) {
1462 goto quit;
1463 }
1464 LIST_REMOVE(pg, pageq.list); /* global list */
1465 LIST_REMOVE(pg, listq.list); /* per-cpu list */
1466 ucpu->pages[PGFL_UNKNOWN]--;
1467 uvmexp.free--;
1468 KASSERT(pg->flags == PG_FREE);
1469 pg->flags = 0;
1470 mutex_spin_exit(&uvm_fpageqlock);
1471 #ifdef PMAP_PAGEIDLEZERO
1472 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) {
1473
1474 /*
1475 * The machine-dependent code detected
1476 * some reason for us to abort zeroing
1477 * pages, probably because there is a
1478 * process now ready to run.
1479 */
1480
1481 mutex_spin_enter(&uvm_fpageqlock);
1482 pg->flags = PG_FREE;
1483 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
1484 nextbucket].pgfl_queues[
1485 PGFL_UNKNOWN], pg, pageq.list);
1486 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
1487 nextbucket].pgfl_queues[
1488 PGFL_UNKNOWN], pg, listq.list);
1489 ucpu->pages[PGFL_UNKNOWN]++;
1490 uvmexp.free++;
1491 CPU_COUNT(CPU_COUNT_ZEROABORTS, 1);
1492 goto quit;
1493 }
1494 #else
1495 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1496 #endif /* PMAP_PAGEIDLEZERO */
1497 if (!mutex_tryenter(&uvm_fpageqlock)) {
1498 lcont = true;
1499 mutex_spin_enter(&uvm_fpageqlock);
1500 } else {
1501 lcont = false;
1502 }
1503 pg->flags = PG_FREE | PG_ZERO;
1504 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
1505 nextbucket].pgfl_queues[PGFL_ZEROS],
1506 pg, pageq.list);
1507 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
1508 nextbucket].pgfl_queues[PGFL_ZEROS],
1509 pg, listq.list);
1510 ucpu->pages[PGFL_ZEROS]++;
1511 uvmexp.free++;
1512 CPU_COUNT(CPU_COUNT_ZEROPAGES, 1);
1513 }
1514 }
1515 if (ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
1516 break;
1517 }
1518 nextbucket = (nextbucket + 1) & uvmexp.colormask;
1519 } while (nextbucket != firstbucket);
1520 ucpu->page_idle_zero = false;
1521 quit:
1522 mutex_spin_exit(&uvm_fpageqlock);
1523 }
1524
1525 /*
1526 * uvm_pagelookup: look up a page
1527 *
1528 * => caller should lock object to keep someone from pulling the page
1529 * out from under it
1530 */
1531
1532 struct vm_page *
1533 uvm_pagelookup(struct uvm_object *obj, voff_t off)
1534 {
1535 struct vm_page *pg;
1536
1537 /* No - used from DDB. KASSERT(mutex_owned(obj->vmobjlock)); */
1538
1539 pg = radix_tree_lookup_node(&obj->uo_pages, off >> PAGE_SHIFT);
1540
1541 KASSERT(pg == NULL || obj->uo_npages != 0);
1542 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1543 (pg->flags & PG_BUSY) != 0);
1544 return pg;
1545 }
1546
1547 /*
1548 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1549 *
1550 * => caller must lock objects
1551 */
1552
1553 void
1554 uvm_pagewire(struct vm_page *pg)
1555 {
1556
1557 KASSERT(uvm_page_locked_p(pg));
1558 #if defined(READAHEAD_STATS)
1559 if ((pg->flags & PG_READAHEAD) != 0) {
1560 uvm_ra_hit.ev_count++;
1561 pg->flags &= ~PG_READAHEAD;
1562 }
1563 #endif /* defined(READAHEAD_STATS) */
1564 if (pg->wire_count == 0) {
1565 uvm_pagedequeue(pg);
1566 atomic_inc_uint(&uvmexp.wired);
1567 }
1568 mutex_enter(&pg->interlock);
1569 pg->wire_count++;
1570 mutex_exit(&pg->interlock);
1571 KASSERT(pg->wire_count > 0); /* detect wraparound */
1572 }
1573
1574 /*
1575 * uvm_pageunwire: unwire the page.
1576 *
1577 * => activate if wire count goes to zero.
1578 * => caller must lock objects
1579 */
1580
1581 void
1582 uvm_pageunwire(struct vm_page *pg)
1583 {
1584
1585 KASSERT(uvm_page_locked_p(pg));
1586 KASSERT(pg->wire_count != 0);
1587 KASSERT(!uvmpdpol_pageisqueued_p(pg));
1588 mutex_enter(&pg->interlock);
1589 pg->wire_count--;
1590 mutex_exit(&pg->interlock);
1591 if (pg->wire_count == 0) {
1592 uvm_pageactivate(pg);
1593 KASSERT(uvmexp.wired != 0);
1594 atomic_dec_uint(&uvmexp.wired);
1595 }
1596 }
1597
1598 /*
1599 * uvm_pagedeactivate: deactivate page
1600 *
1601 * => caller must lock objects
1602 * => caller must check to make sure page is not wired
1603 * => object that page belongs to must be locked (so we can adjust pg->flags)
1604 * => caller must clear the reference on the page before calling
1605 */
1606
1607 void
1608 uvm_pagedeactivate(struct vm_page *pg)
1609 {
1610
1611 KASSERT(uvm_page_locked_p(pg));
1612 if (pg->wire_count == 0) {
1613 KASSERT(uvmpdpol_pageisqueued_p(pg));
1614 uvmpdpol_pagedeactivate(pg);
1615 }
1616 }
1617
1618 /*
1619 * uvm_pageactivate: activate page
1620 *
1621 * => caller must lock objects
1622 */
1623
1624 void
1625 uvm_pageactivate(struct vm_page *pg)
1626 {
1627
1628 KASSERT(uvm_page_locked_p(pg));
1629 #if defined(READAHEAD_STATS)
1630 if ((pg->flags & PG_READAHEAD) != 0) {
1631 uvm_ra_hit.ev_count++;
1632 pg->flags &= ~PG_READAHEAD;
1633 }
1634 #endif /* defined(READAHEAD_STATS) */
1635 if (pg->wire_count == 0) {
1636 uvmpdpol_pageactivate(pg);
1637 }
1638 }
1639
1640 /*
1641 * uvm_pagedequeue: remove a page from any paging queue
1642 *
1643 * => caller must lock objects
1644 */
1645 void
1646 uvm_pagedequeue(struct vm_page *pg)
1647 {
1648
1649 KASSERT(uvm_page_locked_p(pg));
1650 if (uvmpdpol_pageisqueued_p(pg)) {
1651 uvmpdpol_pagedequeue(pg);
1652 }
1653 }
1654
1655 /*
1656 * uvm_pageenqueue: add a page to a paging queue without activating.
1657 * used where a page is not really demanded (yet). eg. read-ahead
1658 *
1659 * => caller must lock objects
1660 */
1661 void
1662 uvm_pageenqueue(struct vm_page *pg)
1663 {
1664
1665 KASSERT(uvm_page_locked_p(pg));
1666 if (pg->wire_count == 0 && !uvmpdpol_pageisqueued_p(pg)) {
1667 uvmpdpol_pageenqueue(pg);
1668 }
1669 }
1670
1671 /*
1672 * uvm_pagezero: zero fill a page
1673 *
1674 * => if page is part of an object then the object should be locked
1675 * to protect pg->flags.
1676 */
1677
1678 void
1679 uvm_pagezero(struct vm_page *pg)
1680 {
1681 pg->flags &= ~PG_CLEAN;
1682 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1683 }
1684
1685 /*
1686 * uvm_pagecopy: copy a page
1687 *
1688 * => if page is part of an object then the object should be locked
1689 * to protect pg->flags.
1690 */
1691
1692 void
1693 uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
1694 {
1695
1696 dst->flags &= ~PG_CLEAN;
1697 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
1698 }
1699
1700 /*
1701 * uvm_pageismanaged: test it see that a page (specified by PA) is managed.
1702 */
1703
1704 bool
1705 uvm_pageismanaged(paddr_t pa)
1706 {
1707
1708 return (uvm_physseg_find(atop(pa), NULL) != UVM_PHYSSEG_TYPE_INVALID);
1709 }
1710
1711 /*
1712 * uvm_page_lookup_freelist: look up the free list for the specified page
1713 */
1714
1715 int
1716 uvm_page_lookup_freelist(struct vm_page *pg)
1717 {
1718 uvm_physseg_t upm;
1719
1720 upm = uvm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
1721 KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID);
1722 return uvm_physseg_get_free_list(upm);
1723 }
1724
1725 /*
1726 * uvm_page_locked_p: return true if object associated with page is
1727 * locked. this is a weak check for runtime assertions only.
1728 */
1729
1730 bool
1731 uvm_page_locked_p(struct vm_page *pg)
1732 {
1733
1734 if (pg->uobject != NULL) {
1735 return mutex_owned(pg->uobject->vmobjlock);
1736 }
1737 if (pg->uanon != NULL) {
1738 return mutex_owned(pg->uanon->an_lock);
1739 }
1740 return true;
1741 }
1742
1743 #ifdef PMAP_DIRECT
1744 /*
1745 * Call pmap to translate physical address into a virtual and to run a callback
1746 * for it. Used to avoid actually mapping the pages, pmap most likely uses direct map
1747 * or equivalent.
1748 */
1749 int
1750 uvm_direct_process(struct vm_page **pgs, u_int npages, voff_t off, vsize_t len,
1751 int (*process)(void *, size_t, void *), void *arg)
1752 {
1753 int error = 0;
1754 paddr_t pa;
1755 size_t todo;
1756 voff_t pgoff = (off & PAGE_MASK);
1757 struct vm_page *pg;
1758
1759 KASSERT(npages > 0 && len > 0);
1760
1761 for (int i = 0; i < npages; i++) {
1762 pg = pgs[i];
1763
1764 KASSERT(len > 0);
1765
1766 /*
1767 * Caller is responsible for ensuring all the pages are
1768 * available.
1769 */
1770 KASSERT(pg != NULL && pg != PGO_DONTCARE);
1771
1772 pa = VM_PAGE_TO_PHYS(pg);
1773 todo = MIN(len, PAGE_SIZE - pgoff);
1774
1775 error = pmap_direct_process(pa, pgoff, todo, process, arg);
1776 if (error)
1777 break;
1778
1779 pgoff = 0;
1780 len -= todo;
1781 }
1782
1783 KASSERTMSG(error != 0 || len == 0, "len %lu != 0 for non-error", len);
1784 return error;
1785 }
1786 #endif /* PMAP_DIRECT */
1787
1788 #if defined(DDB) || defined(DEBUGPRINT)
1789
1790 /*
1791 * uvm_page_printit: actually print the page
1792 */
1793
1794 static const char page_flagbits[] = UVM_PGFLAGBITS;
1795
1796 void
1797 uvm_page_printit(struct vm_page *pg, bool full,
1798 void (*pr)(const char *, ...))
1799 {
1800 struct vm_page *tpg;
1801 struct uvm_object *uobj;
1802 struct pgflist *pgl;
1803 char pgbuf[128];
1804
1805 (*pr)("PAGE %p:\n", pg);
1806 snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags);
1807 (*pr)(" flags=%s, pqflags=%x, wire_count=%d, pa=0x%lx\n",
1808 pgbuf, pg->pqflags, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg));
1809 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
1810 pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
1811 #if defined(UVM_PAGE_TRKOWN)
1812 if (pg->flags & PG_BUSY)
1813 (*pr)(" owning process = %d, tag=%s\n",
1814 pg->owner, pg->owner_tag);
1815 else
1816 (*pr)(" page not busy, no owner\n");
1817 #else
1818 (*pr)(" [page ownership tracking disabled]\n");
1819 #endif
1820
1821 if (!full)
1822 return;
1823
1824 /* cross-verify object/anon */
1825 if ((pg->flags & PG_FREE) == 0) {
1826 if (pg->flags & PG_ANON) {
1827 if (pg->uanon == NULL || pg->uanon->an_page != pg)
1828 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
1829 (pg->uanon) ? pg->uanon->an_page : NULL);
1830 else
1831 (*pr)(" anon backpointer is OK\n");
1832 } else {
1833 uobj = pg->uobject;
1834 if (uobj) {
1835 (*pr)(" checking object list\n");
1836 tpg = uvm_pagelookup(uobj, pg->offset);
1837 if (tpg)
1838 (*pr)(" page found on object list\n");
1839 else
1840 (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
1841 }
1842 }
1843 }
1844
1845 /* cross-verify page queue */
1846 if (pg->flags & PG_FREE) {
1847 int fl = uvm_page_lookup_freelist(pg);
1848 int color = VM_PGCOLOR_BUCKET(pg);
1849 pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[
1850 ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN];
1851 } else {
1852 pgl = NULL;
1853 }
1854
1855 if (pgl) {
1856 (*pr)(" checking pageq list\n");
1857 LIST_FOREACH(tpg, pgl, pageq.list) {
1858 if (tpg == pg) {
1859 break;
1860 }
1861 }
1862 if (tpg)
1863 (*pr)(" page found on pageq list\n");
1864 else
1865 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
1866 }
1867 }
1868
1869 /*
1870 * uvm_page_printall - print a summary of all managed pages
1871 */
1872
1873 void
1874 uvm_page_printall(void (*pr)(const char *, ...))
1875 {
1876 uvm_physseg_t i;
1877 paddr_t pfn;
1878 struct vm_page *pg;
1879
1880 (*pr)("%18s %4s %4s %18s %18s"
1881 #ifdef UVM_PAGE_TRKOWN
1882 " OWNER"
1883 #endif
1884 "\n", "PAGE", "FLAG", "PQ", "UOBJECT", "UANON");
1885 for (i = uvm_physseg_get_first();
1886 uvm_physseg_valid_p(i);
1887 i = uvm_physseg_get_next(i)) {
1888 for (pfn = uvm_physseg_get_start(i);
1889 pfn < uvm_physseg_get_end(i);
1890 pfn++) {
1891 pg = PHYS_TO_VM_PAGE(ptoa(pfn));
1892
1893 (*pr)("%18p %04x %08x %18p %18p",
1894 pg, pg->flags, pg->pqflags, pg->uobject,
1895 pg->uanon);
1896 #ifdef UVM_PAGE_TRKOWN
1897 if (pg->flags & PG_BUSY)
1898 (*pr)(" %d [%s]", pg->owner, pg->owner_tag);
1899 #endif
1900 (*pr)("\n");
1901 }
1902 }
1903 }
1904
1905 #endif /* DDB || DEBUGPRINT */
1906