uvm_page.c revision 1.139 1 /* $NetBSD: uvm_page.c,v 1.139 2008/07/02 17:47:53 ad Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
42 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
43 *
44 *
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 */
68
69 /*
70 * uvm_page.c: page ops.
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.139 2008/07/02 17:47:53 ad Exp $");
75
76 #include "opt_uvmhist.h"
77 #include "opt_readahead.h"
78
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/malloc.h>
82 #include <sys/sched.h>
83 #include <sys/kernel.h>
84 #include <sys/vnode.h>
85 #include <sys/proc.h>
86 #include <sys/atomic.h>
87 #include <sys/cpu.h>
88
89 #include <uvm/uvm.h>
90 #include <uvm/uvm_pdpolicy.h>
91
92 /*
93 * global vars... XXXCDC: move to uvm. structure.
94 */
95
96 /*
97 * physical memory config is stored in vm_physmem.
98 */
99
100 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */
101 int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */
102
103 /*
104 * Some supported CPUs in a given architecture don't support all
105 * of the things necessary to do idle page zero'ing efficiently.
106 * We therefore provide a way to disable it from machdep code here.
107 */
108 /*
109 * XXX disabled until we can find a way to do this without causing
110 * problems for either CPU caches or DMA latency.
111 */
112 bool vm_page_zero_enable = false;
113
114 /*
115 * local variables
116 */
117
118 /*
119 * these variables record the values returned by vm_page_bootstrap,
120 * for debugging purposes. The implementation of uvm_pageboot_alloc
121 * and pmap_startup here also uses them internally.
122 */
123
124 static vaddr_t virtual_space_start;
125 static vaddr_t virtual_space_end;
126
127 /*
128 * we allocate an initial number of page colors in uvm_page_init(),
129 * and remember them. We may re-color pages as cache sizes are
130 * discovered during the autoconfiguration phase. But we can never
131 * free the initial set of buckets, since they are allocated using
132 * uvm_pageboot_alloc().
133 */
134
135 static bool have_recolored_pages /* = false */;
136
137 MALLOC_DEFINE(M_VMPAGE, "VM page", "VM page");
138
139 #ifdef DEBUG
140 vaddr_t uvm_zerocheckkva;
141 #endif /* DEBUG */
142
143 /*
144 * local prototypes
145 */
146
147 static void uvm_pageinsert(struct vm_page *);
148 static void uvm_pageremove(struct vm_page *);
149
150 /*
151 * per-object tree of pages
152 */
153
154 static signed int
155 uvm_page_compare_nodes(const struct rb_node *n1, const struct rb_node *n2)
156 {
157 const struct vm_page *pg1 = (const void *)n1;
158 const struct vm_page *pg2 = (const void *)n2;
159 const voff_t a = pg1->offset;
160 const voff_t b = pg2->offset;
161
162 if (a < b)
163 return 1;
164 if (a > b)
165 return -1;
166 return 0;
167 }
168
169 static signed int
170 uvm_page_compare_key(const struct rb_node *n, const void *key)
171 {
172 const struct vm_page *pg = (const void *)n;
173 const voff_t a = pg->offset;
174 const voff_t b = *(const voff_t *)key;
175
176 if (a < b)
177 return 1;
178 if (a > b)
179 return -1;
180 return 0;
181 }
182
183 const struct rb_tree_ops uvm_page_tree_ops = {
184 .rbto_compare_nodes = uvm_page_compare_nodes,
185 .rbto_compare_key = uvm_page_compare_key,
186 };
187
188 /*
189 * inline functions
190 */
191
192 /*
193 * uvm_pageinsert: insert a page in the object.
194 *
195 * => caller must lock object
196 * => caller must lock page queues
197 * => call should have already set pg's object and offset pointers
198 * and bumped the version counter
199 */
200
201 static inline void
202 uvm_pageinsert_list(struct uvm_object *uobj, struct vm_page *pg,
203 struct vm_page *where)
204 {
205
206 KASSERT(uobj == pg->uobject);
207 KASSERT(mutex_owned(&uobj->vmobjlock));
208 KASSERT((pg->flags & PG_TABLED) == 0);
209 KASSERT(where == NULL || (where->flags & PG_TABLED));
210 KASSERT(where == NULL || (where->uobject == uobj));
211
212 if (UVM_OBJ_IS_VNODE(uobj)) {
213 if (uobj->uo_npages == 0) {
214 struct vnode *vp = (struct vnode *)uobj;
215
216 vholdl(vp);
217 }
218 if (UVM_OBJ_IS_VTEXT(uobj)) {
219 atomic_inc_uint(&uvmexp.execpages);
220 } else {
221 atomic_inc_uint(&uvmexp.filepages);
222 }
223 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
224 atomic_inc_uint(&uvmexp.anonpages);
225 }
226
227 if (where)
228 TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq.queue);
229 else
230 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
231 pg->flags |= PG_TABLED;
232 uobj->uo_npages++;
233 }
234
235
236 static inline void
237 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg)
238 {
239 bool success;
240
241 KASSERT(uobj == pg->uobject);
242 success = rb_tree_insert_node(&uobj->rb_tree, &pg->rb_node);
243 KASSERT(success);
244 }
245
246 static inline void
247 uvm_pageinsert(struct vm_page *pg)
248 {
249 struct uvm_object *uobj = pg->uobject;
250
251 uvm_pageinsert_tree(uobj, pg);
252 uvm_pageinsert_list(uobj, pg, NULL);
253 }
254
255 /*
256 * uvm_page_remove: remove page from object.
257 *
258 * => caller must lock object
259 * => caller must lock page queues
260 */
261
262 static inline void
263 uvm_pageremove_list(struct uvm_object *uobj, struct vm_page *pg)
264 {
265
266 KASSERT(uobj == pg->uobject);
267 KASSERT(mutex_owned(&uobj->vmobjlock));
268 KASSERT(pg->flags & PG_TABLED);
269
270 if (UVM_OBJ_IS_VNODE(uobj)) {
271 if (uobj->uo_npages == 1) {
272 struct vnode *vp = (struct vnode *)uobj;
273
274 holdrelel(vp);
275 }
276 if (UVM_OBJ_IS_VTEXT(uobj)) {
277 atomic_dec_uint(&uvmexp.execpages);
278 } else {
279 atomic_dec_uint(&uvmexp.filepages);
280 }
281 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
282 atomic_dec_uint(&uvmexp.anonpages);
283 }
284
285 /* object should be locked */
286 uobj->uo_npages--;
287 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
288 pg->flags &= ~PG_TABLED;
289 pg->uobject = NULL;
290 }
291
292 static inline void
293 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg)
294 {
295
296 KASSERT(uobj == pg->uobject);
297 rb_tree_remove_node(&uobj->rb_tree, &pg->rb_node);
298 }
299
300 static inline void
301 uvm_pageremove(struct vm_page *pg)
302 {
303 struct uvm_object *uobj = pg->uobject;
304
305 uvm_pageremove_tree(uobj, pg);
306 uvm_pageremove_list(uobj, pg);
307 }
308
309 static void
310 uvm_page_init_buckets(struct pgfreelist *pgfl)
311 {
312 int color, i;
313
314 for (color = 0; color < uvmexp.ncolors; color++) {
315 for (i = 0; i < PGFL_NQUEUES; i++) {
316 LIST_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]);
317 }
318 }
319 }
320
321 /*
322 * uvm_page_init: init the page system. called from uvm_init().
323 *
324 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
325 */
326
327 void
328 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
329 {
330 vsize_t freepages, pagecount, bucketcount, n;
331 struct pgflbucket *bucketarray, *cpuarray;
332 struct vm_page *pagearray;
333 int lcv;
334 u_int i;
335 paddr_t paddr;
336
337 KASSERT(ncpu <= 1);
338 CTASSERT(sizeof(pagearray->offset) >= sizeof(struct uvm_cpu *));
339
340 /*
341 * init the page queues and page queue locks, except the free
342 * list; we allocate that later (with the initial vm_page
343 * structures).
344 */
345
346 curcpu()->ci_data.cpu_uvm = &uvm.cpus[0];
347 uvmpdpol_init();
348 mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE);
349 mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM);
350
351 /*
352 * allocate vm_page structures.
353 */
354
355 /*
356 * sanity check:
357 * before calling this function the MD code is expected to register
358 * some free RAM with the uvm_page_physload() function. our job
359 * now is to allocate vm_page structures for this memory.
360 */
361
362 if (vm_nphysseg == 0)
363 panic("uvm_page_bootstrap: no memory pre-allocated");
364
365 /*
366 * first calculate the number of free pages...
367 *
368 * note that we use start/end rather than avail_start/avail_end.
369 * this allows us to allocate extra vm_page structures in case we
370 * want to return some memory to the pool after booting.
371 */
372
373 freepages = 0;
374 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
375 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
376
377 /*
378 * Let MD code initialize the number of colors, or default
379 * to 1 color if MD code doesn't care.
380 */
381 if (uvmexp.ncolors == 0)
382 uvmexp.ncolors = 1;
383 uvmexp.colormask = uvmexp.ncolors - 1;
384
385 /*
386 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
387 * use. for each page of memory we use we need a vm_page structure.
388 * thus, the total number of pages we can use is the total size of
389 * the memory divided by the PAGE_SIZE plus the size of the vm_page
390 * structure. we add one to freepages as a fudge factor to avoid
391 * truncation errors (since we can only allocate in terms of whole
392 * pages).
393 */
394
395 bucketcount = uvmexp.ncolors * VM_NFREELIST;
396 pagecount = ((freepages + 1) << PAGE_SHIFT) /
397 (PAGE_SIZE + sizeof(struct vm_page));
398
399 bucketarray = (void *)uvm_pageboot_alloc((bucketcount *
400 sizeof(struct pgflbucket) * 2) + (pagecount *
401 sizeof(struct vm_page)));
402 cpuarray = bucketarray + bucketcount;
403 pagearray = (struct vm_page *)(bucketarray + bucketcount * 2);
404
405 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
406 uvm.page_free[lcv].pgfl_buckets =
407 (bucketarray + (lcv * uvmexp.ncolors));
408 uvm_page_init_buckets(&uvm.page_free[lcv]);
409 uvm.cpus[0].page_free[lcv].pgfl_buckets =
410 (cpuarray + (lcv * uvmexp.ncolors));
411 uvm_page_init_buckets(&uvm.cpus[0].page_free[lcv]);
412 }
413 memset(pagearray, 0, pagecount * sizeof(struct vm_page));
414
415 /*
416 * init the vm_page structures and put them in the correct place.
417 */
418
419 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
420 n = vm_physmem[lcv].end - vm_physmem[lcv].start;
421
422 /* set up page array pointers */
423 vm_physmem[lcv].pgs = pagearray;
424 pagearray += n;
425 pagecount -= n;
426 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
427
428 /* init and free vm_pages (we've already zeroed them) */
429 paddr = ptoa(vm_physmem[lcv].start);
430 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
431 vm_physmem[lcv].pgs[i].phys_addr = paddr;
432 #ifdef __HAVE_VM_PAGE_MD
433 VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]);
434 #endif
435 if (atop(paddr) >= vm_physmem[lcv].avail_start &&
436 atop(paddr) <= vm_physmem[lcv].avail_end) {
437 uvmexp.npages++;
438 /* add page to free pool */
439 uvm_pagefree(&vm_physmem[lcv].pgs[i]);
440 }
441 }
442 }
443
444 /*
445 * pass up the values of virtual_space_start and
446 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
447 * layers of the VM.
448 */
449
450 *kvm_startp = round_page(virtual_space_start);
451 *kvm_endp = trunc_page(virtual_space_end);
452 #ifdef DEBUG
453 /*
454 * steal kva for uvm_pagezerocheck().
455 */
456 uvm_zerocheckkva = *kvm_startp;
457 *kvm_startp += PAGE_SIZE;
458 #endif /* DEBUG */
459
460 /*
461 * init various thresholds.
462 */
463
464 uvmexp.reserve_pagedaemon = 1;
465 uvmexp.reserve_kernel = 5;
466
467 /*
468 * determine if we should zero pages in the idle loop.
469 */
470
471 uvm.cpus[0].page_idle_zero = vm_page_zero_enable;
472
473 /*
474 * done!
475 */
476
477 uvm.page_init_done = true;
478 }
479
480 /*
481 * uvm_setpagesize: set the page size
482 *
483 * => sets page_shift and page_mask from uvmexp.pagesize.
484 */
485
486 void
487 uvm_setpagesize(void)
488 {
489
490 /*
491 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
492 * to be a constant (indicated by being a non-zero value).
493 */
494 if (uvmexp.pagesize == 0) {
495 if (PAGE_SIZE == 0)
496 panic("uvm_setpagesize: uvmexp.pagesize not set");
497 uvmexp.pagesize = PAGE_SIZE;
498 }
499 uvmexp.pagemask = uvmexp.pagesize - 1;
500 if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
501 panic("uvm_setpagesize: page size not a power of two");
502 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
503 if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
504 break;
505 }
506
507 /*
508 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
509 */
510
511 vaddr_t
512 uvm_pageboot_alloc(vsize_t size)
513 {
514 static bool initialized = false;
515 vaddr_t addr;
516 #if !defined(PMAP_STEAL_MEMORY)
517 vaddr_t vaddr;
518 paddr_t paddr;
519 #endif
520
521 /*
522 * on first call to this function, initialize ourselves.
523 */
524 if (initialized == false) {
525 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
526
527 /* round it the way we like it */
528 virtual_space_start = round_page(virtual_space_start);
529 virtual_space_end = trunc_page(virtual_space_end);
530
531 initialized = true;
532 }
533
534 /* round to page size */
535 size = round_page(size);
536
537 #if defined(PMAP_STEAL_MEMORY)
538
539 /*
540 * defer bootstrap allocation to MD code (it may want to allocate
541 * from a direct-mapped segment). pmap_steal_memory should adjust
542 * virtual_space_start/virtual_space_end if necessary.
543 */
544
545 addr = pmap_steal_memory(size, &virtual_space_start,
546 &virtual_space_end);
547
548 return(addr);
549
550 #else /* !PMAP_STEAL_MEMORY */
551
552 /*
553 * allocate virtual memory for this request
554 */
555 if (virtual_space_start == virtual_space_end ||
556 (virtual_space_end - virtual_space_start) < size)
557 panic("uvm_pageboot_alloc: out of virtual space");
558
559 addr = virtual_space_start;
560
561 #ifdef PMAP_GROWKERNEL
562 /*
563 * If the kernel pmap can't map the requested space,
564 * then allocate more resources for it.
565 */
566 if (uvm_maxkaddr < (addr + size)) {
567 uvm_maxkaddr = pmap_growkernel(addr + size);
568 if (uvm_maxkaddr < (addr + size))
569 panic("uvm_pageboot_alloc: pmap_growkernel() failed");
570 }
571 #endif
572
573 virtual_space_start += size;
574
575 /*
576 * allocate and mapin physical pages to back new virtual pages
577 */
578
579 for (vaddr = round_page(addr) ; vaddr < addr + size ;
580 vaddr += PAGE_SIZE) {
581
582 if (!uvm_page_physget(&paddr))
583 panic("uvm_pageboot_alloc: out of memory");
584
585 /*
586 * Note this memory is no longer managed, so using
587 * pmap_kenter is safe.
588 */
589 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
590 }
591 pmap_update(pmap_kernel());
592 return(addr);
593 #endif /* PMAP_STEAL_MEMORY */
594 }
595
596 #if !defined(PMAP_STEAL_MEMORY)
597 /*
598 * uvm_page_physget: "steal" one page from the vm_physmem structure.
599 *
600 * => attempt to allocate it off the end of a segment in which the "avail"
601 * values match the start/end values. if we can't do that, then we
602 * will advance both values (making them equal, and removing some
603 * vm_page structures from the non-avail area).
604 * => return false if out of memory.
605 */
606
607 /* subroutine: try to allocate from memory chunks on the specified freelist */
608 static bool uvm_page_physget_freelist(paddr_t *, int);
609
610 static bool
611 uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
612 {
613 int lcv, x;
614
615 /* pass 1: try allocating from a matching end */
616 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
617 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
618 #else
619 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
620 #endif
621 {
622
623 if (uvm.page_init_done == true)
624 panic("uvm_page_physget: called _after_ bootstrap");
625
626 if (vm_physmem[lcv].free_list != freelist)
627 continue;
628
629 /* try from front */
630 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
631 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
632 *paddrp = ptoa(vm_physmem[lcv].avail_start);
633 vm_physmem[lcv].avail_start++;
634 vm_physmem[lcv].start++;
635 /* nothing left? nuke it */
636 if (vm_physmem[lcv].avail_start ==
637 vm_physmem[lcv].end) {
638 if (vm_nphysseg == 1)
639 panic("uvm_page_physget: out of memory!");
640 vm_nphysseg--;
641 for (x = lcv ; x < vm_nphysseg ; x++)
642 /* structure copy */
643 vm_physmem[x] = vm_physmem[x+1];
644 }
645 return (true);
646 }
647
648 /* try from rear */
649 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
650 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
651 *paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
652 vm_physmem[lcv].avail_end--;
653 vm_physmem[lcv].end--;
654 /* nothing left? nuke it */
655 if (vm_physmem[lcv].avail_end ==
656 vm_physmem[lcv].start) {
657 if (vm_nphysseg == 1)
658 panic("uvm_page_physget: out of memory!");
659 vm_nphysseg--;
660 for (x = lcv ; x < vm_nphysseg ; x++)
661 /* structure copy */
662 vm_physmem[x] = vm_physmem[x+1];
663 }
664 return (true);
665 }
666 }
667
668 /* pass2: forget about matching ends, just allocate something */
669 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
670 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
671 #else
672 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
673 #endif
674 {
675
676 /* any room in this bank? */
677 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
678 continue; /* nope */
679
680 *paddrp = ptoa(vm_physmem[lcv].avail_start);
681 vm_physmem[lcv].avail_start++;
682 /* truncate! */
683 vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
684
685 /* nothing left? nuke it */
686 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
687 if (vm_nphysseg == 1)
688 panic("uvm_page_physget: out of memory!");
689 vm_nphysseg--;
690 for (x = lcv ; x < vm_nphysseg ; x++)
691 /* structure copy */
692 vm_physmem[x] = vm_physmem[x+1];
693 }
694 return (true);
695 }
696
697 return (false); /* whoops! */
698 }
699
700 bool
701 uvm_page_physget(paddr_t *paddrp)
702 {
703 int i;
704
705 /* try in the order of freelist preference */
706 for (i = 0; i < VM_NFREELIST; i++)
707 if (uvm_page_physget_freelist(paddrp, i) == true)
708 return (true);
709 return (false);
710 }
711 #endif /* PMAP_STEAL_MEMORY */
712
713 /*
714 * uvm_page_physload: load physical memory into VM system
715 *
716 * => all args are PFs
717 * => all pages in start/end get vm_page structures
718 * => areas marked by avail_start/avail_end get added to the free page pool
719 * => we are limited to VM_PHYSSEG_MAX physical memory segments
720 */
721
722 void
723 uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
724 paddr_t avail_end, int free_list)
725 {
726 int preload, lcv;
727 psize_t npages;
728 struct vm_page *pgs;
729 struct vm_physseg *ps;
730
731 if (uvmexp.pagesize == 0)
732 panic("uvm_page_physload: page size not set!");
733 if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
734 panic("uvm_page_physload: bad free list %d", free_list);
735 if (start >= end)
736 panic("uvm_page_physload: start >= end");
737
738 /*
739 * do we have room?
740 */
741
742 if (vm_nphysseg == VM_PHYSSEG_MAX) {
743 printf("uvm_page_physload: unable to load physical memory "
744 "segment\n");
745 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
746 VM_PHYSSEG_MAX, (long long)start, (long long)end);
747 printf("\tincrease VM_PHYSSEG_MAX\n");
748 return;
749 }
750
751 /*
752 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
753 * called yet, so malloc is not available).
754 */
755
756 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
757 if (vm_physmem[lcv].pgs)
758 break;
759 }
760 preload = (lcv == vm_nphysseg);
761
762 /*
763 * if VM is already running, attempt to malloc() vm_page structures
764 */
765
766 if (!preload) {
767 #if defined(VM_PHYSSEG_NOADD)
768 panic("uvm_page_physload: tried to add RAM after vm_mem_init");
769 #else
770 /* XXXCDC: need some sort of lockout for this case */
771 paddr_t paddr;
772 npages = end - start; /* # of pages */
773 pgs = malloc(sizeof(struct vm_page) * npages,
774 M_VMPAGE, M_NOWAIT);
775 if (pgs == NULL) {
776 printf("uvm_page_physload: can not malloc vm_page "
777 "structs for segment\n");
778 printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
779 return;
780 }
781 /* zero data, init phys_addr and free_list, and free pages */
782 memset(pgs, 0, sizeof(struct vm_page) * npages);
783 for (lcv = 0, paddr = ptoa(start) ;
784 lcv < npages ; lcv++, paddr += PAGE_SIZE) {
785 pgs[lcv].phys_addr = paddr;
786 pgs[lcv].free_list = free_list;
787 if (atop(paddr) >= avail_start &&
788 atop(paddr) <= avail_end)
789 uvm_pagefree(&pgs[lcv]);
790 }
791 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */
792 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
793 #endif
794 } else {
795 pgs = NULL;
796 npages = 0;
797 }
798
799 /*
800 * now insert us in the proper place in vm_physmem[]
801 */
802
803 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
804 /* random: put it at the end (easy!) */
805 ps = &vm_physmem[vm_nphysseg];
806 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
807 {
808 int x;
809 /* sort by address for binary search */
810 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
811 if (start < vm_physmem[lcv].start)
812 break;
813 ps = &vm_physmem[lcv];
814 /* move back other entries, if necessary ... */
815 for (x = vm_nphysseg ; x > lcv ; x--)
816 /* structure copy */
817 vm_physmem[x] = vm_physmem[x - 1];
818 }
819 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
820 {
821 int x;
822 /* sort by largest segment first */
823 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
824 if ((end - start) >
825 (vm_physmem[lcv].end - vm_physmem[lcv].start))
826 break;
827 ps = &vm_physmem[lcv];
828 /* move back other entries, if necessary ... */
829 for (x = vm_nphysseg ; x > lcv ; x--)
830 /* structure copy */
831 vm_physmem[x] = vm_physmem[x - 1];
832 }
833 #else
834 panic("uvm_page_physload: unknown physseg strategy selected!");
835 #endif
836
837 ps->start = start;
838 ps->end = end;
839 ps->avail_start = avail_start;
840 ps->avail_end = avail_end;
841 if (preload) {
842 ps->pgs = NULL;
843 } else {
844 ps->pgs = pgs;
845 ps->lastpg = pgs + npages - 1;
846 }
847 ps->free_list = free_list;
848 vm_nphysseg++;
849
850 if (!preload) {
851 uvmpdpol_reinit();
852 }
853 }
854
855 /*
856 * uvm_page_recolor: Recolor the pages if the new bucket count is
857 * larger than the old one.
858 */
859
860 void
861 uvm_page_recolor(int newncolors)
862 {
863 struct pgflbucket *bucketarray, *cpuarray, *oldbucketarray;
864 struct pgfreelist gpgfl, pgfl;
865 struct vm_page *pg;
866 vsize_t bucketcount;
867 int lcv, color, i, ocolors;
868 struct uvm_cpu *ucpu;
869
870 if (newncolors <= uvmexp.ncolors)
871 return;
872
873 if (uvm.page_init_done == false) {
874 uvmexp.ncolors = newncolors;
875 return;
876 }
877
878 bucketcount = newncolors * VM_NFREELIST;
879 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket) * 2,
880 M_VMPAGE, M_NOWAIT);
881 cpuarray = bucketarray + bucketcount;
882 if (bucketarray == NULL) {
883 printf("WARNING: unable to allocate %ld page color buckets\n",
884 (long) bucketcount);
885 return;
886 }
887
888 mutex_spin_enter(&uvm_fpageqlock);
889
890 /* Make sure we should still do this. */
891 if (newncolors <= uvmexp.ncolors) {
892 mutex_spin_exit(&uvm_fpageqlock);
893 free(bucketarray, M_VMPAGE);
894 return;
895 }
896
897 oldbucketarray = uvm.page_free[0].pgfl_buckets;
898 ocolors = uvmexp.ncolors;
899
900 uvmexp.ncolors = newncolors;
901 uvmexp.colormask = uvmexp.ncolors - 1;
902
903 ucpu = curcpu()->ci_data.cpu_uvm;
904 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
905 gpgfl.pgfl_buckets = (bucketarray + (lcv * newncolors));
906 pgfl.pgfl_buckets = (cpuarray + (lcv * uvmexp.ncolors));
907 uvm_page_init_buckets(&gpgfl);
908 uvm_page_init_buckets(&pgfl);
909 for (color = 0; color < ocolors; color++) {
910 for (i = 0; i < PGFL_NQUEUES; i++) {
911 while ((pg = LIST_FIRST(&uvm.page_free[
912 lcv].pgfl_buckets[color].pgfl_queues[i]))
913 != NULL) {
914 LIST_REMOVE(pg, pageq.list); /* global */
915 LIST_REMOVE(pg, listq.list); /* cpu */
916 LIST_INSERT_HEAD(&gpgfl.pgfl_buckets[
917 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
918 i], pg, pageq.list);
919 LIST_INSERT_HEAD(&pgfl.pgfl_buckets[
920 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
921 i], pg, listq.list);
922 }
923 }
924 }
925 uvm.page_free[lcv].pgfl_buckets = gpgfl.pgfl_buckets;
926 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
927 }
928
929 if (have_recolored_pages) {
930 mutex_spin_exit(&uvm_fpageqlock);
931 free(oldbucketarray, M_VMPAGE);
932 return;
933 }
934
935 have_recolored_pages = true;
936 mutex_spin_exit(&uvm_fpageqlock);
937 }
938
939 /*
940 * uvm_cpu_attach: initialize per-CPU data structures.
941 */
942
943 void
944 uvm_cpu_attach(struct cpu_info *ci)
945 {
946 struct pgflbucket *bucketarray;
947 struct pgfreelist pgfl;
948 struct uvm_cpu *ucpu;
949 vsize_t bucketcount;
950 int lcv;
951
952 if (CPU_IS_PRIMARY(ci)) {
953 /* Already done in uvm_page_init(). */
954 return;
955 }
956
957 bucketcount = uvmexp.ncolors * VM_NFREELIST;
958 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket),
959 M_VMPAGE, M_WAITOK);
960 ucpu = &uvm.cpus[cpu_index(ci)];
961 ci->ci_data.cpu_uvm = ucpu;
962 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
963 pgfl.pgfl_buckets = (bucketarray + (lcv * uvmexp.ncolors));
964 uvm_page_init_buckets(&pgfl);
965 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
966 }
967 }
968
969 /*
970 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
971 */
972
973 static struct vm_page *
974 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int flist, int try1, int try2,
975 int *trycolorp)
976 {
977 struct pgflist *freeq;
978 struct vm_page *pg;
979 int color, trycolor = *trycolorp;
980 struct pgfreelist *gpgfl, *pgfl;
981
982 KASSERT(mutex_owned(&uvm_fpageqlock));
983
984 color = trycolor;
985 pgfl = &ucpu->page_free[flist];
986 gpgfl = &uvm.page_free[flist];
987 do {
988 /* cpu, try1 */
989 if ((pg = LIST_FIRST((freeq =
990 &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
991 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
992 uvmexp.cpuhit++;
993 goto gotit;
994 }
995 /* global, try1 */
996 if ((pg = LIST_FIRST((freeq =
997 &gpgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
998 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
999 uvmexp.cpumiss++;
1000 goto gotit;
1001 }
1002 /* cpu, try2 */
1003 if ((pg = LIST_FIRST((freeq =
1004 &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
1005 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
1006 uvmexp.cpuhit++;
1007 goto gotit;
1008 }
1009 /* global, try2 */
1010 if ((pg = LIST_FIRST((freeq =
1011 &gpgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
1012 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
1013 uvmexp.cpumiss++;
1014 goto gotit;
1015 }
1016 color = (color + 1) & uvmexp.colormask;
1017 } while (color != trycolor);
1018
1019 return (NULL);
1020
1021 gotit:
1022 LIST_REMOVE(pg, pageq.list); /* global list */
1023 LIST_REMOVE(pg, listq.list); /* per-cpu list */
1024 uvmexp.free--;
1025
1026 /* update zero'd page count */
1027 if (pg->flags & PG_ZERO)
1028 uvmexp.zeropages--;
1029
1030 if (color == trycolor)
1031 uvmexp.colorhit++;
1032 else {
1033 uvmexp.colormiss++;
1034 *trycolorp = color;
1035 }
1036
1037 return (pg);
1038 }
1039
1040 /*
1041 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
1042 *
1043 * => return null if no pages free
1044 * => wake up pagedaemon if number of free pages drops below low water mark
1045 * => if obj != NULL, obj must be locked (to put in obj's tree)
1046 * => if anon != NULL, anon must be locked (to put in anon)
1047 * => only one of obj or anon can be non-null
1048 * => caller must activate/deactivate page if it is not wired.
1049 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
1050 * => policy decision: it is more important to pull a page off of the
1051 * appropriate priority free list than it is to get a zero'd or
1052 * unknown contents page. This is because we live with the
1053 * consequences of a bad free list decision for the entire
1054 * lifetime of the page, e.g. if the page comes from memory that
1055 * is slower to access.
1056 */
1057
1058 struct vm_page *
1059 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
1060 int flags, int strat, int free_list)
1061 {
1062 int lcv, try1, try2, zeroit = 0, color;
1063 struct uvm_cpu *ucpu;
1064 struct vm_page *pg;
1065 bool use_reserve;
1066
1067 KASSERT(obj == NULL || anon == NULL);
1068 KASSERT(anon == NULL || off == 0);
1069 KASSERT(off == trunc_page(off));
1070 KASSERT(obj == NULL || mutex_owned(&obj->vmobjlock));
1071 KASSERT(anon == NULL || mutex_owned(&anon->an_lock));
1072
1073 mutex_spin_enter(&uvm_fpageqlock);
1074
1075 /*
1076 * This implements a global round-robin page coloring
1077 * algorithm.
1078 *
1079 * XXXJRT: What about virtually-indexed caches?
1080 */
1081
1082 ucpu = curcpu()->ci_data.cpu_uvm;
1083 color = ucpu->page_free_nextcolor;
1084
1085 /*
1086 * check to see if we need to generate some free pages waking
1087 * the pagedaemon.
1088 */
1089
1090 uvm_kick_pdaemon();
1091
1092 /*
1093 * fail if any of these conditions is true:
1094 * [1] there really are no free pages, or
1095 * [2] only kernel "reserved" pages remain and
1096 * the page isn't being allocated to a kernel object.
1097 * [3] only pagedaemon "reserved" pages remain and
1098 * the requestor isn't the pagedaemon.
1099 */
1100
1101 use_reserve = (flags & UVM_PGA_USERESERVE) ||
1102 (obj && UVM_OBJ_IS_KERN_OBJECT(obj));
1103 if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
1104 (uvmexp.free <= uvmexp.reserve_pagedaemon &&
1105 !(use_reserve && curlwp == uvm.pagedaemon_lwp)))
1106 goto fail;
1107
1108 #if PGFL_NQUEUES != 2
1109 #error uvm_pagealloc_strat needs to be updated
1110 #endif
1111
1112 /*
1113 * If we want a zero'd page, try the ZEROS queue first, otherwise
1114 * we try the UNKNOWN queue first.
1115 */
1116 if (flags & UVM_PGA_ZERO) {
1117 try1 = PGFL_ZEROS;
1118 try2 = PGFL_UNKNOWN;
1119 } else {
1120 try1 = PGFL_UNKNOWN;
1121 try2 = PGFL_ZEROS;
1122 }
1123
1124 again:
1125 switch (strat) {
1126 case UVM_PGA_STRAT_NORMAL:
1127 /* Check all freelists in descending priority order. */
1128 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
1129 pg = uvm_pagealloc_pgfl(ucpu, lcv,
1130 try1, try2, &color);
1131 if (pg != NULL)
1132 goto gotit;
1133 }
1134
1135 /* No pages free! */
1136 goto fail;
1137
1138 case UVM_PGA_STRAT_ONLY:
1139 case UVM_PGA_STRAT_FALLBACK:
1140 /* Attempt to allocate from the specified free list. */
1141 KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
1142 pg = uvm_pagealloc_pgfl(ucpu, free_list,
1143 try1, try2, &color);
1144 if (pg != NULL)
1145 goto gotit;
1146
1147 /* Fall back, if possible. */
1148 if (strat == UVM_PGA_STRAT_FALLBACK) {
1149 strat = UVM_PGA_STRAT_NORMAL;
1150 goto again;
1151 }
1152
1153 /* No pages free! */
1154 goto fail;
1155
1156 default:
1157 panic("uvm_pagealloc_strat: bad strat %d", strat);
1158 /* NOTREACHED */
1159 }
1160
1161 gotit:
1162 /*
1163 * We now know which color we actually allocated from; set
1164 * the next color accordingly.
1165 */
1166
1167 ucpu->page_free_nextcolor = (color + 1) & uvmexp.colormask;
1168
1169 /*
1170 * update allocation statistics and remember if we have to
1171 * zero the page
1172 */
1173
1174 if (flags & UVM_PGA_ZERO) {
1175 if (pg->flags & PG_ZERO) {
1176 uvmexp.pga_zerohit++;
1177 zeroit = 0;
1178 } else {
1179 uvmexp.pga_zeromiss++;
1180 zeroit = 1;
1181 }
1182 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
1183 ucpu->page_idle_zero = vm_page_zero_enable;
1184 }
1185 }
1186 mutex_spin_exit(&uvm_fpageqlock);
1187
1188 pg->offset = off;
1189 pg->uobject = obj;
1190 pg->uanon = anon;
1191 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
1192 if (anon) {
1193 anon->an_page = pg;
1194 pg->pqflags = PQ_ANON;
1195 atomic_inc_uint(&uvmexp.anonpages);
1196 } else {
1197 if (obj) {
1198 uvm_pageinsert(pg);
1199 }
1200 pg->pqflags = 0;
1201 }
1202 #if defined(UVM_PAGE_TRKOWN)
1203 pg->owner_tag = NULL;
1204 #endif
1205 UVM_PAGE_OWN(pg, "new alloc");
1206
1207 if (flags & UVM_PGA_ZERO) {
1208 /*
1209 * A zero'd page is not clean. If we got a page not already
1210 * zero'd, then we have to zero it ourselves.
1211 */
1212 pg->flags &= ~PG_CLEAN;
1213 if (zeroit)
1214 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1215 }
1216
1217 return(pg);
1218
1219 fail:
1220 mutex_spin_exit(&uvm_fpageqlock);
1221 return (NULL);
1222 }
1223
1224 /*
1225 * uvm_pagereplace: replace a page with another
1226 *
1227 * => object must be locked
1228 */
1229
1230 void
1231 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg)
1232 {
1233 struct uvm_object *uobj = oldpg->uobject;
1234
1235 KASSERT((oldpg->flags & PG_TABLED) != 0);
1236 KASSERT(uobj != NULL);
1237 KASSERT((newpg->flags & PG_TABLED) == 0);
1238 KASSERT(newpg->uobject == NULL);
1239 KASSERT(mutex_owned(&uobj->vmobjlock));
1240
1241 newpg->uobject = uobj;
1242 newpg->offset = oldpg->offset;
1243
1244 uvm_pageremove_tree(uobj, oldpg);
1245 uvm_pageinsert_tree(uobj, newpg);
1246 uvm_pageinsert_list(uobj, newpg, oldpg);
1247 uvm_pageremove_list(uobj, oldpg);
1248 }
1249
1250 /*
1251 * uvm_pagerealloc: reallocate a page from one object to another
1252 *
1253 * => both objects must be locked
1254 */
1255
1256 void
1257 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
1258 {
1259 /*
1260 * remove it from the old object
1261 */
1262
1263 if (pg->uobject) {
1264 uvm_pageremove(pg);
1265 }
1266
1267 /*
1268 * put it in the new object
1269 */
1270
1271 if (newobj) {
1272 pg->uobject = newobj;
1273 pg->offset = newoff;
1274 uvm_pageinsert(pg);
1275 }
1276 }
1277
1278 #ifdef DEBUG
1279 /*
1280 * check if page is zero-filled
1281 *
1282 * - called with free page queue lock held.
1283 */
1284 void
1285 uvm_pagezerocheck(struct vm_page *pg)
1286 {
1287 int *p, *ep;
1288
1289 KASSERT(uvm_zerocheckkva != 0);
1290 KASSERT(mutex_owned(&uvm_fpageqlock));
1291
1292 /*
1293 * XXX assuming pmap_kenter_pa and pmap_kremove never call
1294 * uvm page allocator.
1295 *
1296 * it might be better to have "CPU-local temporary map" pmap interface.
1297 */
1298 pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ);
1299 p = (int *)uvm_zerocheckkva;
1300 ep = (int *)((char *)p + PAGE_SIZE);
1301 pmap_update(pmap_kernel());
1302 while (p < ep) {
1303 if (*p != 0)
1304 panic("PG_ZERO page isn't zero-filled");
1305 p++;
1306 }
1307 pmap_kremove(uvm_zerocheckkva, PAGE_SIZE);
1308 /*
1309 * pmap_update() is not necessary here because no one except us
1310 * uses this VA.
1311 */
1312 }
1313 #endif /* DEBUG */
1314
1315 /*
1316 * uvm_pagefree: free page
1317 *
1318 * => erase page's identity (i.e. remove from object)
1319 * => put page on free list
1320 * => caller must lock owning object (either anon or uvm_object)
1321 * => caller must lock page queues
1322 * => assumes all valid mappings of pg are gone
1323 */
1324
1325 void
1326 uvm_pagefree(struct vm_page *pg)
1327 {
1328 struct pgflist *pgfl;
1329 struct uvm_cpu *ucpu;
1330 int index, color, queue;
1331 bool iszero;
1332
1333 #ifdef DEBUG
1334 if (pg->uobject == (void *)0xdeadbeef &&
1335 pg->uanon == (void *)0xdeadbeef) {
1336 panic("uvm_pagefree: freeing free page %p", pg);
1337 }
1338 #endif /* DEBUG */
1339
1340 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1341 KASSERT(mutex_owned(&uvm_pageqlock) || !uvmpdpol_pageisqueued_p(pg));
1342 KASSERT(pg->uobject == NULL || mutex_owned(&pg->uobject->vmobjlock));
1343 KASSERT(pg->uobject != NULL || pg->uanon == NULL ||
1344 mutex_owned(&pg->uanon->an_lock));
1345
1346 /*
1347 * if the page is loaned, resolve the loan instead of freeing.
1348 */
1349
1350 if (pg->loan_count) {
1351 KASSERT(pg->wire_count == 0);
1352
1353 /*
1354 * if the page is owned by an anon then we just want to
1355 * drop anon ownership. the kernel will free the page when
1356 * it is done with it. if the page is owned by an object,
1357 * remove it from the object and mark it dirty for the benefit
1358 * of possible anon owners.
1359 *
1360 * regardless of previous ownership, wakeup any waiters,
1361 * unbusy the page, and we're done.
1362 */
1363
1364 if (pg->uobject != NULL) {
1365 uvm_pageremove(pg);
1366 pg->flags &= ~PG_CLEAN;
1367 } else if (pg->uanon != NULL) {
1368 if ((pg->pqflags & PQ_ANON) == 0) {
1369 pg->loan_count--;
1370 } else {
1371 pg->pqflags &= ~PQ_ANON;
1372 atomic_dec_uint(&uvmexp.anonpages);
1373 }
1374 pg->uanon->an_page = NULL;
1375 pg->uanon = NULL;
1376 }
1377 if (pg->flags & PG_WANTED) {
1378 wakeup(pg);
1379 }
1380 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
1381 #ifdef UVM_PAGE_TRKOWN
1382 pg->owner_tag = NULL;
1383 #endif
1384 if (pg->loan_count) {
1385 KASSERT(pg->uobject == NULL);
1386 if (pg->uanon == NULL) {
1387 uvm_pagedequeue(pg);
1388 }
1389 return;
1390 }
1391 }
1392
1393 /*
1394 * remove page from its object or anon.
1395 */
1396
1397 if (pg->uobject != NULL) {
1398 uvm_pageremove(pg);
1399 } else if (pg->uanon != NULL) {
1400 pg->uanon->an_page = NULL;
1401 atomic_dec_uint(&uvmexp.anonpages);
1402 }
1403
1404 /*
1405 * now remove the page from the queues.
1406 */
1407
1408 uvm_pagedequeue(pg);
1409
1410 /*
1411 * if the page was wired, unwire it now.
1412 */
1413
1414 if (pg->wire_count) {
1415 pg->wire_count = 0;
1416 uvmexp.wired--;
1417 }
1418
1419 /*
1420 * and put on free queue
1421 */
1422
1423 iszero = (pg->flags & PG_ZERO);
1424 index = uvm_page_lookup_freelist(pg);
1425 color = VM_PGCOLOR_BUCKET(pg);
1426 queue = (iszero ? PGFL_ZEROS : PGFL_UNKNOWN);
1427
1428 pg->pqflags = PQ_FREE;
1429 #ifdef DEBUG
1430 pg->uobject = (void *)0xdeadbeef;
1431 pg->uanon = (void *)0xdeadbeef;
1432 #endif
1433
1434 mutex_spin_enter(&uvm_fpageqlock);
1435
1436 #ifdef DEBUG
1437 if (iszero)
1438 uvm_pagezerocheck(pg);
1439 #endif /* DEBUG */
1440
1441
1442 /* global list */
1443 pgfl = &uvm.page_free[index].pgfl_buckets[color].pgfl_queues[queue];
1444 LIST_INSERT_HEAD(pgfl, pg, pageq.list);
1445 uvmexp.free++;
1446 if (iszero) {
1447 uvmexp.zeropages++;
1448 }
1449
1450 /* per-cpu list */
1451 ucpu = curcpu()->ci_data.cpu_uvm;
1452 pg->offset = (uintptr_t)ucpu;
1453 pgfl = &ucpu->page_free[index].pgfl_buckets[color].pgfl_queues[queue];
1454 LIST_INSERT_HEAD(pgfl, pg, listq.list);
1455 ucpu->pages[queue]++;
1456 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
1457 ucpu->page_idle_zero = vm_page_zero_enable;
1458 }
1459
1460 mutex_spin_exit(&uvm_fpageqlock);
1461 }
1462
1463 /*
1464 * uvm_page_unbusy: unbusy an array of pages.
1465 *
1466 * => pages must either all belong to the same object, or all belong to anons.
1467 * => if pages are object-owned, object must be locked.
1468 * => if pages are anon-owned, anons must be locked.
1469 * => caller must lock page queues if pages may be released.
1470 * => caller must make sure that anon-owned pages are not PG_RELEASED.
1471 */
1472
1473 void
1474 uvm_page_unbusy(struct vm_page **pgs, int npgs)
1475 {
1476 struct vm_page *pg;
1477 int i;
1478 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
1479
1480 for (i = 0; i < npgs; i++) {
1481 pg = pgs[i];
1482 if (pg == NULL || pg == PGO_DONTCARE) {
1483 continue;
1484 }
1485
1486 KASSERT(pg->uobject == NULL ||
1487 mutex_owned(&pg->uobject->vmobjlock));
1488 KASSERT(pg->uobject != NULL ||
1489 (pg->uanon != NULL && mutex_owned(&pg->uanon->an_lock)));
1490
1491 KASSERT(pg->flags & PG_BUSY);
1492 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1493 if (pg->flags & PG_WANTED) {
1494 wakeup(pg);
1495 }
1496 if (pg->flags & PG_RELEASED) {
1497 UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0);
1498 KASSERT(pg->uobject != NULL ||
1499 (pg->uanon != NULL && pg->uanon->an_ref > 0));
1500 pg->flags &= ~PG_RELEASED;
1501 uvm_pagefree(pg);
1502 } else {
1503 UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0);
1504 pg->flags &= ~(PG_WANTED|PG_BUSY);
1505 UVM_PAGE_OWN(pg, NULL);
1506 }
1507 }
1508 }
1509
1510 #if defined(UVM_PAGE_TRKOWN)
1511 /*
1512 * uvm_page_own: set or release page ownership
1513 *
1514 * => this is a debugging function that keeps track of who sets PG_BUSY
1515 * and where they do it. it can be used to track down problems
1516 * such a process setting "PG_BUSY" and never releasing it.
1517 * => page's object [if any] must be locked
1518 * => if "tag" is NULL then we are releasing page ownership
1519 */
1520 void
1521 uvm_page_own(struct vm_page *pg, const char *tag)
1522 {
1523 struct uvm_object *uobj;
1524 struct vm_anon *anon;
1525
1526 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
1527
1528 uobj = pg->uobject;
1529 anon = pg->uanon;
1530 if (uobj != NULL) {
1531 KASSERT(mutex_owned(&uobj->vmobjlock));
1532 } else if (anon != NULL) {
1533 KASSERT(mutex_owned(&anon->an_lock));
1534 }
1535
1536 KASSERT((pg->flags & PG_WANTED) == 0);
1537
1538 /* gain ownership? */
1539 if (tag) {
1540 KASSERT((pg->flags & PG_BUSY) != 0);
1541 if (pg->owner_tag) {
1542 printf("uvm_page_own: page %p already owned "
1543 "by proc %d [%s]\n", pg,
1544 pg->owner, pg->owner_tag);
1545 panic("uvm_page_own");
1546 }
1547 pg->owner = (curproc) ? curproc->p_pid : (pid_t) -1;
1548 pg->lowner = (curlwp) ? curlwp->l_lid : (lwpid_t) -1;
1549 pg->owner_tag = tag;
1550 return;
1551 }
1552
1553 /* drop ownership */
1554 KASSERT((pg->flags & PG_BUSY) == 0);
1555 if (pg->owner_tag == NULL) {
1556 printf("uvm_page_own: dropping ownership of an non-owned "
1557 "page (%p)\n", pg);
1558 panic("uvm_page_own");
1559 }
1560 if (!uvmpdpol_pageisqueued_p(pg)) {
1561 KASSERT((pg->uanon == NULL && pg->uobject == NULL) ||
1562 pg->wire_count > 0);
1563 } else {
1564 KASSERT(pg->wire_count == 0);
1565 }
1566 pg->owner_tag = NULL;
1567 }
1568 #endif
1569
1570 /*
1571 * uvm_pageidlezero: zero free pages while the system is idle.
1572 *
1573 * => try to complete one color bucket at a time, to reduce our impact
1574 * on the CPU cache.
1575 * => we loop until we either reach the target or there is a lwp ready
1576 * to run, or MD code detects a reason to break early.
1577 */
1578 void
1579 uvm_pageidlezero(void)
1580 {
1581 struct vm_page *pg;
1582 struct pgfreelist *pgfl, *gpgfl;
1583 struct uvm_cpu *ucpu;
1584 int free_list, firstbucket, nextbucket;
1585
1586 ucpu = curcpu()->ci_data.cpu_uvm;
1587 if (!ucpu->page_idle_zero ||
1588 ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
1589 ucpu->page_idle_zero = false;
1590 return;
1591 }
1592 mutex_enter(&uvm_fpageqlock);
1593 firstbucket = ucpu->page_free_nextcolor;
1594 nextbucket = firstbucket;
1595 do {
1596 for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
1597 if (sched_curcpu_runnable_p()) {
1598 goto quit;
1599 }
1600 pgfl = &ucpu->page_free[free_list];
1601 gpgfl = &uvm.page_free[free_list];
1602 while ((pg = LIST_FIRST(&pgfl->pgfl_buckets[
1603 nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
1604 if (sched_curcpu_runnable_p()) {
1605 goto quit;
1606 }
1607 LIST_REMOVE(pg, pageq.list); /* global list */
1608 LIST_REMOVE(pg, listq.list); /* per-cpu list */
1609 ucpu->pages[PGFL_UNKNOWN]--;
1610 uvmexp.free--;
1611 mutex_spin_exit(&uvm_fpageqlock);
1612 #ifdef PMAP_PAGEIDLEZERO
1613 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) {
1614
1615 /*
1616 * The machine-dependent code detected
1617 * some reason for us to abort zeroing
1618 * pages, probably because there is a
1619 * process now ready to run.
1620 */
1621
1622 mutex_spin_enter(&uvm_fpageqlock);
1623 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
1624 nextbucket].pgfl_queues[
1625 PGFL_UNKNOWN], pg, pageq.list);
1626 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
1627 nextbucket].pgfl_queues[
1628 PGFL_UNKNOWN], pg, listq.list);
1629 ucpu->pages[PGFL_UNKNOWN]++;
1630 uvmexp.free++;
1631 uvmexp.zeroaborts++;
1632 goto quit;
1633 }
1634 #else
1635 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1636 #endif /* PMAP_PAGEIDLEZERO */
1637 pg->flags |= PG_ZERO;
1638
1639 mutex_spin_enter(&uvm_fpageqlock);
1640 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
1641 nextbucket].pgfl_queues[PGFL_ZEROS],
1642 pg, pageq.list);
1643 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
1644 nextbucket].pgfl_queues[PGFL_ZEROS],
1645 pg, listq.list);
1646 ucpu->pages[PGFL_ZEROS]++;
1647 uvmexp.free++;
1648 uvmexp.zeropages++;
1649 }
1650 }
1651 if (ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
1652 break;
1653 }
1654 nextbucket = (nextbucket + 1) & uvmexp.colormask;
1655 } while (nextbucket != firstbucket);
1656 ucpu->page_idle_zero = false;
1657 quit:
1658 mutex_spin_exit(&uvm_fpageqlock);
1659 }
1660
1661 /*
1662 * uvm_pagelookup: look up a page
1663 *
1664 * => caller should lock object to keep someone from pulling the page
1665 * out from under it
1666 */
1667
1668 struct vm_page *
1669 uvm_pagelookup(struct uvm_object *obj, voff_t off)
1670 {
1671 struct vm_page *pg;
1672
1673 KASSERT(mutex_owned(&obj->vmobjlock));
1674
1675 pg = (struct vm_page *)rb_tree_find_node(&obj->rb_tree, &off);
1676
1677 KASSERT(pg == NULL || obj->uo_npages != 0);
1678 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1679 (pg->flags & PG_BUSY) != 0);
1680 return(pg);
1681 }
1682
1683 /*
1684 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1685 *
1686 * => caller must lock page queues
1687 */
1688
1689 void
1690 uvm_pagewire(struct vm_page *pg)
1691 {
1692 KASSERT(mutex_owned(&uvm_pageqlock));
1693 #if defined(READAHEAD_STATS)
1694 if ((pg->pqflags & PQ_READAHEAD) != 0) {
1695 uvm_ra_hit.ev_count++;
1696 pg->pqflags &= ~PQ_READAHEAD;
1697 }
1698 #endif /* defined(READAHEAD_STATS) */
1699 if (pg->wire_count == 0) {
1700 uvm_pagedequeue(pg);
1701 uvmexp.wired++;
1702 }
1703 pg->wire_count++;
1704 }
1705
1706 /*
1707 * uvm_pageunwire: unwire the page.
1708 *
1709 * => activate if wire count goes to zero.
1710 * => caller must lock page queues
1711 */
1712
1713 void
1714 uvm_pageunwire(struct vm_page *pg)
1715 {
1716 KASSERT(mutex_owned(&uvm_pageqlock));
1717 pg->wire_count--;
1718 if (pg->wire_count == 0) {
1719 uvm_pageactivate(pg);
1720 uvmexp.wired--;
1721 }
1722 }
1723
1724 /*
1725 * uvm_pagedeactivate: deactivate page
1726 *
1727 * => caller must lock page queues
1728 * => caller must check to make sure page is not wired
1729 * => object that page belongs to must be locked (so we can adjust pg->flags)
1730 * => caller must clear the reference on the page before calling
1731 */
1732
1733 void
1734 uvm_pagedeactivate(struct vm_page *pg)
1735 {
1736
1737 KASSERT(mutex_owned(&uvm_pageqlock));
1738 KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg));
1739 uvmpdpol_pagedeactivate(pg);
1740 }
1741
1742 /*
1743 * uvm_pageactivate: activate page
1744 *
1745 * => caller must lock page queues
1746 */
1747
1748 void
1749 uvm_pageactivate(struct vm_page *pg)
1750 {
1751
1752 KASSERT(mutex_owned(&uvm_pageqlock));
1753 #if defined(READAHEAD_STATS)
1754 if ((pg->pqflags & PQ_READAHEAD) != 0) {
1755 uvm_ra_hit.ev_count++;
1756 pg->pqflags &= ~PQ_READAHEAD;
1757 }
1758 #endif /* defined(READAHEAD_STATS) */
1759 if (pg->wire_count != 0) {
1760 return;
1761 }
1762 uvmpdpol_pageactivate(pg);
1763 }
1764
1765 /*
1766 * uvm_pagedequeue: remove a page from any paging queue
1767 */
1768
1769 void
1770 uvm_pagedequeue(struct vm_page *pg)
1771 {
1772
1773 if (uvmpdpol_pageisqueued_p(pg)) {
1774 KASSERT(mutex_owned(&uvm_pageqlock));
1775 }
1776
1777 uvmpdpol_pagedequeue(pg);
1778 }
1779
1780 /*
1781 * uvm_pageenqueue: add a page to a paging queue without activating.
1782 * used where a page is not really demanded (yet). eg. read-ahead
1783 */
1784
1785 void
1786 uvm_pageenqueue(struct vm_page *pg)
1787 {
1788
1789 KASSERT(mutex_owned(&uvm_pageqlock));
1790 if (pg->wire_count != 0) {
1791 return;
1792 }
1793 uvmpdpol_pageenqueue(pg);
1794 }
1795
1796 /*
1797 * uvm_pagezero: zero fill a page
1798 *
1799 * => if page is part of an object then the object should be locked
1800 * to protect pg->flags.
1801 */
1802
1803 void
1804 uvm_pagezero(struct vm_page *pg)
1805 {
1806 pg->flags &= ~PG_CLEAN;
1807 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1808 }
1809
1810 /*
1811 * uvm_pagecopy: copy a page
1812 *
1813 * => if page is part of an object then the object should be locked
1814 * to protect pg->flags.
1815 */
1816
1817 void
1818 uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
1819 {
1820
1821 dst->flags &= ~PG_CLEAN;
1822 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
1823 }
1824
1825 /*
1826 * uvm_page_lookup_freelist: look up the free list for the specified page
1827 */
1828
1829 int
1830 uvm_page_lookup_freelist(struct vm_page *pg)
1831 {
1832 int lcv;
1833
1834 lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
1835 KASSERT(lcv != -1);
1836 return (vm_physmem[lcv].free_list);
1837 }
1838