uvm_page.c revision 1.146 1 /* $NetBSD: uvm_page.c,v 1.146 2009/08/10 23:17:29 haad Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
42 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
43 *
44 *
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 */
68
69 /*
70 * uvm_page.c: page ops.
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.146 2009/08/10 23:17:29 haad Exp $");
75
76 #include "opt_uvmhist.h"
77 #include "opt_readahead.h"
78
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/malloc.h>
82 #include <sys/sched.h>
83 #include <sys/kernel.h>
84 #include <sys/vnode.h>
85 #include <sys/proc.h>
86 #include <sys/atomic.h>
87 #include <sys/cpu.h>
88
89 #include <uvm/uvm.h>
90 #include <uvm/uvm_pdpolicy.h>
91
92 /*
93 * global vars... XXXCDC: move to uvm. structure.
94 */
95
96 /*
97 * physical memory config is stored in vm_physmem.
98 */
99
100 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */
101 int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */
102
103 /*
104 * Some supported CPUs in a given architecture don't support all
105 * of the things necessary to do idle page zero'ing efficiently.
106 * We therefore provide a way to disable it from machdep code here.
107 */
108 /*
109 * XXX disabled until we can find a way to do this without causing
110 * problems for either CPU caches or DMA latency.
111 */
112 bool vm_page_zero_enable = false;
113
114 /*
115 * number of pages per-CPU to reserve for the kernel.
116 */
117 int vm_page_reserve_kernel = 5;
118
119 /* Physical memory size */
120 uintptr_t physmem;
121
122 /*
123 * local variables
124 */
125
126 /*
127 * these variables record the values returned by vm_page_bootstrap,
128 * for debugging purposes. The implementation of uvm_pageboot_alloc
129 * and pmap_startup here also uses them internally.
130 */
131
132 static vaddr_t virtual_space_start;
133 static vaddr_t virtual_space_end;
134
135 /*
136 * we allocate an initial number of page colors in uvm_page_init(),
137 * and remember them. We may re-color pages as cache sizes are
138 * discovered during the autoconfiguration phase. But we can never
139 * free the initial set of buckets, since they are allocated using
140 * uvm_pageboot_alloc().
141 */
142
143 static bool have_recolored_pages /* = false */;
144
145 MALLOC_DEFINE(M_VMPAGE, "VM page", "VM page");
146
147 #ifdef DEBUG
148 vaddr_t uvm_zerocheckkva;
149 #endif /* DEBUG */
150
151 /*
152 * local prototypes
153 */
154
155 static void uvm_pageinsert(struct vm_page *);
156 static void uvm_pageremove(struct vm_page *);
157
158 /*
159 * per-object tree of pages
160 */
161
162 static signed int
163 uvm_page_compare_nodes(const struct rb_node *n1, const struct rb_node *n2)
164 {
165 const struct vm_page *pg1 = (const void *)n1;
166 const struct vm_page *pg2 = (const void *)n2;
167 const voff_t a = pg1->offset;
168 const voff_t b = pg2->offset;
169
170 if (a < b)
171 return 1;
172 if (a > b)
173 return -1;
174 return 0;
175 }
176
177 static signed int
178 uvm_page_compare_key(const struct rb_node *n, const void *key)
179 {
180 const struct vm_page *pg = (const void *)n;
181 const voff_t a = pg->offset;
182 const voff_t b = *(const voff_t *)key;
183
184 if (a < b)
185 return 1;
186 if (a > b)
187 return -1;
188 return 0;
189 }
190
191 const struct rb_tree_ops uvm_page_tree_ops = {
192 .rbto_compare_nodes = uvm_page_compare_nodes,
193 .rbto_compare_key = uvm_page_compare_key,
194 };
195
196 /*
197 * inline functions
198 */
199
200 /*
201 * uvm_pageinsert: insert a page in the object.
202 *
203 * => caller must lock object
204 * => caller must lock page queues
205 * => call should have already set pg's object and offset pointers
206 * and bumped the version counter
207 */
208
209 static inline void
210 uvm_pageinsert_list(struct uvm_object *uobj, struct vm_page *pg,
211 struct vm_page *where)
212 {
213
214 KASSERT(uobj == pg->uobject);
215 KASSERT(mutex_owned(&uobj->vmobjlock));
216 KASSERT((pg->flags & PG_TABLED) == 0);
217 KASSERT(where == NULL || (where->flags & PG_TABLED));
218 KASSERT(where == NULL || (where->uobject == uobj));
219
220 if (UVM_OBJ_IS_VNODE(uobj)) {
221 if (uobj->uo_npages == 0) {
222 struct vnode *vp = (struct vnode *)uobj;
223
224 vholdl(vp);
225 }
226 if (UVM_OBJ_IS_VTEXT(uobj)) {
227 atomic_inc_uint(&uvmexp.execpages);
228 } else {
229 atomic_inc_uint(&uvmexp.filepages);
230 }
231 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
232 atomic_inc_uint(&uvmexp.anonpages);
233 }
234
235 if (where)
236 TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq.queue);
237 else
238 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
239 pg->flags |= PG_TABLED;
240 uobj->uo_npages++;
241 }
242
243
244 static inline void
245 uvm_pageinsert_tree(struct uvm_object *uobj, struct vm_page *pg)
246 {
247 bool success;
248
249 KASSERT(uobj == pg->uobject);
250 success = rb_tree_insert_node(&uobj->rb_tree, &pg->rb_node);
251 KASSERT(success);
252 }
253
254 static inline void
255 uvm_pageinsert(struct vm_page *pg)
256 {
257 struct uvm_object *uobj = pg->uobject;
258
259 uvm_pageinsert_tree(uobj, pg);
260 uvm_pageinsert_list(uobj, pg, NULL);
261 }
262
263 /*
264 * uvm_page_remove: remove page from object.
265 *
266 * => caller must lock object
267 * => caller must lock page queues
268 */
269
270 static inline void
271 uvm_pageremove_list(struct uvm_object *uobj, struct vm_page *pg)
272 {
273
274 KASSERT(uobj == pg->uobject);
275 KASSERT(mutex_owned(&uobj->vmobjlock));
276 KASSERT(pg->flags & PG_TABLED);
277
278 if (UVM_OBJ_IS_VNODE(uobj)) {
279 if (uobj->uo_npages == 1) {
280 struct vnode *vp = (struct vnode *)uobj;
281
282 holdrelel(vp);
283 }
284 if (UVM_OBJ_IS_VTEXT(uobj)) {
285 atomic_dec_uint(&uvmexp.execpages);
286 } else {
287 atomic_dec_uint(&uvmexp.filepages);
288 }
289 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
290 atomic_dec_uint(&uvmexp.anonpages);
291 }
292
293 /* object should be locked */
294 uobj->uo_npages--;
295 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
296 pg->flags &= ~PG_TABLED;
297 pg->uobject = NULL;
298 }
299
300 static inline void
301 uvm_pageremove_tree(struct uvm_object *uobj, struct vm_page *pg)
302 {
303
304 KASSERT(uobj == pg->uobject);
305 rb_tree_remove_node(&uobj->rb_tree, &pg->rb_node);
306 }
307
308 static inline void
309 uvm_pageremove(struct vm_page *pg)
310 {
311 struct uvm_object *uobj = pg->uobject;
312
313 uvm_pageremove_tree(uobj, pg);
314 uvm_pageremove_list(uobj, pg);
315 }
316
317 static void
318 uvm_page_init_buckets(struct pgfreelist *pgfl)
319 {
320 int color, i;
321
322 for (color = 0; color < uvmexp.ncolors; color++) {
323 for (i = 0; i < PGFL_NQUEUES; i++) {
324 LIST_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]);
325 }
326 }
327 }
328
329 /*
330 * uvm_page_init: init the page system. called from uvm_init().
331 *
332 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
333 */
334
335 void
336 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
337 {
338 vsize_t freepages, pagecount, bucketcount, n;
339 struct pgflbucket *bucketarray, *cpuarray;
340 struct vm_page *pagearray;
341 int lcv;
342 u_int i;
343 paddr_t paddr;
344
345 KASSERT(ncpu <= 1);
346 CTASSERT(sizeof(pagearray->offset) >= sizeof(struct uvm_cpu *));
347
348 /*
349 * init the page queues and page queue locks, except the free
350 * list; we allocate that later (with the initial vm_page
351 * structures).
352 */
353
354 curcpu()->ci_data.cpu_uvm = &uvm.cpus[0];
355 uvm_reclaim_init();
356 uvmpdpol_init();
357 mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE);
358 mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM);
359
360 /*
361 * allocate vm_page structures.
362 */
363
364 /*
365 * sanity check:
366 * before calling this function the MD code is expected to register
367 * some free RAM with the uvm_page_physload() function. our job
368 * now is to allocate vm_page structures for this memory.
369 */
370
371 if (vm_nphysseg == 0)
372 panic("uvm_page_bootstrap: no memory pre-allocated");
373
374 /*
375 * first calculate the number of free pages...
376 *
377 * note that we use start/end rather than avail_start/avail_end.
378 * this allows us to allocate extra vm_page structures in case we
379 * want to return some memory to the pool after booting.
380 */
381
382 freepages = 0;
383 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
384 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
385
386 /*
387 * Let MD code initialize the number of colors, or default
388 * to 1 color if MD code doesn't care.
389 */
390 if (uvmexp.ncolors == 0)
391 uvmexp.ncolors = 1;
392 uvmexp.colormask = uvmexp.ncolors - 1;
393
394 /*
395 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
396 * use. for each page of memory we use we need a vm_page structure.
397 * thus, the total number of pages we can use is the total size of
398 * the memory divided by the PAGE_SIZE plus the size of the vm_page
399 * structure. we add one to freepages as a fudge factor to avoid
400 * truncation errors (since we can only allocate in terms of whole
401 * pages).
402 */
403
404 bucketcount = uvmexp.ncolors * VM_NFREELIST;
405 pagecount = ((freepages + 1) << PAGE_SHIFT) /
406 (PAGE_SIZE + sizeof(struct vm_page));
407
408 bucketarray = (void *)uvm_pageboot_alloc((bucketcount *
409 sizeof(struct pgflbucket) * 2) + (pagecount *
410 sizeof(struct vm_page)));
411 cpuarray = bucketarray + bucketcount;
412 pagearray = (struct vm_page *)(bucketarray + bucketcount * 2);
413
414 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
415 uvm.page_free[lcv].pgfl_buckets =
416 (bucketarray + (lcv * uvmexp.ncolors));
417 uvm_page_init_buckets(&uvm.page_free[lcv]);
418 uvm.cpus[0].page_free[lcv].pgfl_buckets =
419 (cpuarray + (lcv * uvmexp.ncolors));
420 uvm_page_init_buckets(&uvm.cpus[0].page_free[lcv]);
421 }
422 memset(pagearray, 0, pagecount * sizeof(struct vm_page));
423
424 /*
425 * init the vm_page structures and put them in the correct place.
426 */
427
428 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
429 n = vm_physmem[lcv].end - vm_physmem[lcv].start;
430
431 /* set up page array pointers */
432 vm_physmem[lcv].pgs = pagearray;
433 pagearray += n;
434 pagecount -= n;
435 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
436
437 /* init and free vm_pages (we've already zeroed them) */
438 paddr = ptoa(vm_physmem[lcv].start);
439 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
440 vm_physmem[lcv].pgs[i].phys_addr = paddr;
441 #ifdef __HAVE_VM_PAGE_MD
442 VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]);
443 #endif
444 if (atop(paddr) >= vm_physmem[lcv].avail_start &&
445 atop(paddr) <= vm_physmem[lcv].avail_end) {
446 uvmexp.npages++;
447 /* add page to free pool */
448 uvm_pagefree(&vm_physmem[lcv].pgs[i]);
449 }
450 }
451 }
452
453 /*
454 * pass up the values of virtual_space_start and
455 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
456 * layers of the VM.
457 */
458
459 *kvm_startp = round_page(virtual_space_start);
460 *kvm_endp = trunc_page(virtual_space_end);
461 #ifdef DEBUG
462 /*
463 * steal kva for uvm_pagezerocheck().
464 */
465 uvm_zerocheckkva = *kvm_startp;
466 *kvm_startp += PAGE_SIZE;
467 #endif /* DEBUG */
468
469 /*
470 * init various thresholds.
471 */
472
473 uvmexp.reserve_pagedaemon = 1;
474 uvmexp.reserve_kernel = vm_page_reserve_kernel;
475
476 /*
477 * determine if we should zero pages in the idle loop.
478 */
479
480 uvm.cpus[0].page_idle_zero = vm_page_zero_enable;
481
482 /*
483 * done!
484 */
485
486 uvm.page_init_done = true;
487 }
488
489 /*
490 * uvm_setpagesize: set the page size
491 *
492 * => sets page_shift and page_mask from uvmexp.pagesize.
493 */
494
495 void
496 uvm_setpagesize(void)
497 {
498
499 /*
500 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
501 * to be a constant (indicated by being a non-zero value).
502 */
503 if (uvmexp.pagesize == 0) {
504 if (PAGE_SIZE == 0)
505 panic("uvm_setpagesize: uvmexp.pagesize not set");
506 uvmexp.pagesize = PAGE_SIZE;
507 }
508 uvmexp.pagemask = uvmexp.pagesize - 1;
509 if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
510 panic("uvm_setpagesize: page size not a power of two");
511 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
512 if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
513 break;
514 }
515
516 /*
517 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
518 */
519
520 vaddr_t
521 uvm_pageboot_alloc(vsize_t size)
522 {
523 static bool initialized = false;
524 vaddr_t addr;
525 #if !defined(PMAP_STEAL_MEMORY)
526 vaddr_t vaddr;
527 paddr_t paddr;
528 #endif
529
530 /*
531 * on first call to this function, initialize ourselves.
532 */
533 if (initialized == false) {
534 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
535
536 /* round it the way we like it */
537 virtual_space_start = round_page(virtual_space_start);
538 virtual_space_end = trunc_page(virtual_space_end);
539
540 initialized = true;
541 }
542
543 /* round to page size */
544 size = round_page(size);
545
546 #if defined(PMAP_STEAL_MEMORY)
547
548 /*
549 * defer bootstrap allocation to MD code (it may want to allocate
550 * from a direct-mapped segment). pmap_steal_memory should adjust
551 * virtual_space_start/virtual_space_end if necessary.
552 */
553
554 addr = pmap_steal_memory(size, &virtual_space_start,
555 &virtual_space_end);
556
557 return(addr);
558
559 #else /* !PMAP_STEAL_MEMORY */
560
561 /*
562 * allocate virtual memory for this request
563 */
564 if (virtual_space_start == virtual_space_end ||
565 (virtual_space_end - virtual_space_start) < size)
566 panic("uvm_pageboot_alloc: out of virtual space");
567
568 addr = virtual_space_start;
569
570 #ifdef PMAP_GROWKERNEL
571 /*
572 * If the kernel pmap can't map the requested space,
573 * then allocate more resources for it.
574 */
575 if (uvm_maxkaddr < (addr + size)) {
576 uvm_maxkaddr = pmap_growkernel(addr + size);
577 if (uvm_maxkaddr < (addr + size))
578 panic("uvm_pageboot_alloc: pmap_growkernel() failed");
579 }
580 #endif
581
582 virtual_space_start += size;
583
584 /*
585 * allocate and mapin physical pages to back new virtual pages
586 */
587
588 for (vaddr = round_page(addr) ; vaddr < addr + size ;
589 vaddr += PAGE_SIZE) {
590
591 if (!uvm_page_physget(&paddr))
592 panic("uvm_pageboot_alloc: out of memory");
593
594 /*
595 * Note this memory is no longer managed, so using
596 * pmap_kenter is safe.
597 */
598 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
599 }
600 pmap_update(pmap_kernel());
601 return(addr);
602 #endif /* PMAP_STEAL_MEMORY */
603 }
604
605 #if !defined(PMAP_STEAL_MEMORY)
606 /*
607 * uvm_page_physget: "steal" one page from the vm_physmem structure.
608 *
609 * => attempt to allocate it off the end of a segment in which the "avail"
610 * values match the start/end values. if we can't do that, then we
611 * will advance both values (making them equal, and removing some
612 * vm_page structures from the non-avail area).
613 * => return false if out of memory.
614 */
615
616 /* subroutine: try to allocate from memory chunks on the specified freelist */
617 static bool uvm_page_physget_freelist(paddr_t *, int);
618
619 static bool
620 uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
621 {
622 int lcv, x;
623
624 /* pass 1: try allocating from a matching end */
625 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
626 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
627 #else
628 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
629 #endif
630 {
631
632 if (uvm.page_init_done == true)
633 panic("uvm_page_physget: called _after_ bootstrap");
634
635 if (vm_physmem[lcv].free_list != freelist)
636 continue;
637
638 /* try from front */
639 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
640 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
641 *paddrp = ptoa(vm_physmem[lcv].avail_start);
642 vm_physmem[lcv].avail_start++;
643 vm_physmem[lcv].start++;
644 /* nothing left? nuke it */
645 if (vm_physmem[lcv].avail_start ==
646 vm_physmem[lcv].end) {
647 if (vm_nphysseg == 1)
648 panic("uvm_page_physget: out of memory!");
649 vm_nphysseg--;
650 for (x = lcv ; x < vm_nphysseg ; x++)
651 /* structure copy */
652 vm_physmem[x] = vm_physmem[x+1];
653 }
654 return (true);
655 }
656
657 /* try from rear */
658 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
659 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
660 *paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
661 vm_physmem[lcv].avail_end--;
662 vm_physmem[lcv].end--;
663 /* nothing left? nuke it */
664 if (vm_physmem[lcv].avail_end ==
665 vm_physmem[lcv].start) {
666 if (vm_nphysseg == 1)
667 panic("uvm_page_physget: out of memory!");
668 vm_nphysseg--;
669 for (x = lcv ; x < vm_nphysseg ; x++)
670 /* structure copy */
671 vm_physmem[x] = vm_physmem[x+1];
672 }
673 return (true);
674 }
675 }
676
677 /* pass2: forget about matching ends, just allocate something */
678 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
679 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
680 #else
681 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
682 #endif
683 {
684
685 /* any room in this bank? */
686 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
687 continue; /* nope */
688
689 *paddrp = ptoa(vm_physmem[lcv].avail_start);
690 vm_physmem[lcv].avail_start++;
691 /* truncate! */
692 vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
693
694 /* nothing left? nuke it */
695 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
696 if (vm_nphysseg == 1)
697 panic("uvm_page_physget: out of memory!");
698 vm_nphysseg--;
699 for (x = lcv ; x < vm_nphysseg ; x++)
700 /* structure copy */
701 vm_physmem[x] = vm_physmem[x+1];
702 }
703 return (true);
704 }
705
706 return (false); /* whoops! */
707 }
708
709 bool
710 uvm_page_physget(paddr_t *paddrp)
711 {
712 int i;
713
714 /* try in the order of freelist preference */
715 for (i = 0; i < VM_NFREELIST; i++)
716 if (uvm_page_physget_freelist(paddrp, i) == true)
717 return (true);
718 return (false);
719 }
720 #endif /* PMAP_STEAL_MEMORY */
721
722 /*
723 * uvm_page_physload: load physical memory into VM system
724 *
725 * => all args are PFs
726 * => all pages in start/end get vm_page structures
727 * => areas marked by avail_start/avail_end get added to the free page pool
728 * => we are limited to VM_PHYSSEG_MAX physical memory segments
729 */
730
731 void
732 uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
733 paddr_t avail_end, int free_list)
734 {
735 int preload, lcv;
736 psize_t npages;
737 struct vm_page *pgs;
738 struct vm_physseg *ps;
739
740 if (uvmexp.pagesize == 0)
741 panic("uvm_page_physload: page size not set!");
742 if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
743 panic("uvm_page_physload: bad free list %d", free_list);
744 if (start >= end)
745 panic("uvm_page_physload: start >= end");
746
747 /*
748 * do we have room?
749 */
750
751 if (vm_nphysseg == VM_PHYSSEG_MAX) {
752 printf("uvm_page_physload: unable to load physical memory "
753 "segment\n");
754 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
755 VM_PHYSSEG_MAX, (long long)start, (long long)end);
756 printf("\tincrease VM_PHYSSEG_MAX\n");
757 return;
758 }
759
760 /*
761 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
762 * called yet, so malloc is not available).
763 */
764
765 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
766 if (vm_physmem[lcv].pgs)
767 break;
768 }
769 preload = (lcv == vm_nphysseg);
770
771 /*
772 * if VM is already running, attempt to malloc() vm_page structures
773 */
774
775 if (!preload) {
776 #if defined(VM_PHYSSEG_NOADD)
777 panic("uvm_page_physload: tried to add RAM after vm_mem_init");
778 #else
779 /* XXXCDC: need some sort of lockout for this case */
780 paddr_t paddr;
781 npages = end - start; /* # of pages */
782 pgs = malloc(sizeof(struct vm_page) * npages,
783 M_VMPAGE, M_NOWAIT);
784 if (pgs == NULL) {
785 printf("uvm_page_physload: can not malloc vm_page "
786 "structs for segment\n");
787 printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
788 return;
789 }
790 /* zero data, init phys_addr and free_list, and free pages */
791 memset(pgs, 0, sizeof(struct vm_page) * npages);
792 for (lcv = 0, paddr = ptoa(start) ;
793 lcv < npages ; lcv++, paddr += PAGE_SIZE) {
794 pgs[lcv].phys_addr = paddr;
795 pgs[lcv].free_list = free_list;
796 if (atop(paddr) >= avail_start &&
797 atop(paddr) <= avail_end)
798 uvm_pagefree(&pgs[lcv]);
799 }
800 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */
801 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
802 #endif
803 } else {
804 pgs = NULL;
805 npages = 0;
806 }
807
808 /*
809 * now insert us in the proper place in vm_physmem[]
810 */
811
812 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
813 /* random: put it at the end (easy!) */
814 ps = &vm_physmem[vm_nphysseg];
815 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
816 {
817 int x;
818 /* sort by address for binary search */
819 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
820 if (start < vm_physmem[lcv].start)
821 break;
822 ps = &vm_physmem[lcv];
823 /* move back other entries, if necessary ... */
824 for (x = vm_nphysseg ; x > lcv ; x--)
825 /* structure copy */
826 vm_physmem[x] = vm_physmem[x - 1];
827 }
828 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
829 {
830 int x;
831 /* sort by largest segment first */
832 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
833 if ((end - start) >
834 (vm_physmem[lcv].end - vm_physmem[lcv].start))
835 break;
836 ps = &vm_physmem[lcv];
837 /* move back other entries, if necessary ... */
838 for (x = vm_nphysseg ; x > lcv ; x--)
839 /* structure copy */
840 vm_physmem[x] = vm_physmem[x - 1];
841 }
842 #else
843 panic("uvm_page_physload: unknown physseg strategy selected!");
844 #endif
845
846 ps->start = start;
847 ps->end = end;
848 ps->avail_start = avail_start;
849 ps->avail_end = avail_end;
850 if (preload) {
851 ps->pgs = NULL;
852 } else {
853 ps->pgs = pgs;
854 ps->lastpg = pgs + npages - 1;
855 }
856 ps->free_list = free_list;
857 vm_nphysseg++;
858
859 if (!preload) {
860 uvmpdpol_reinit();
861 }
862 }
863
864 /*
865 * uvm_page_recolor: Recolor the pages if the new bucket count is
866 * larger than the old one.
867 */
868
869 void
870 uvm_page_recolor(int newncolors)
871 {
872 struct pgflbucket *bucketarray, *cpuarray, *oldbucketarray;
873 struct pgfreelist gpgfl, pgfl;
874 struct vm_page *pg;
875 vsize_t bucketcount;
876 int lcv, color, i, ocolors;
877 struct uvm_cpu *ucpu;
878
879 if (newncolors <= uvmexp.ncolors)
880 return;
881
882 if (uvm.page_init_done == false) {
883 uvmexp.ncolors = newncolors;
884 return;
885 }
886
887 bucketcount = newncolors * VM_NFREELIST;
888 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket) * 2,
889 M_VMPAGE, M_NOWAIT);
890 cpuarray = bucketarray + bucketcount;
891 if (bucketarray == NULL) {
892 printf("WARNING: unable to allocate %ld page color buckets\n",
893 (long) bucketcount);
894 return;
895 }
896
897 mutex_spin_enter(&uvm_fpageqlock);
898
899 /* Make sure we should still do this. */
900 if (newncolors <= uvmexp.ncolors) {
901 mutex_spin_exit(&uvm_fpageqlock);
902 free(bucketarray, M_VMPAGE);
903 return;
904 }
905
906 oldbucketarray = uvm.page_free[0].pgfl_buckets;
907 ocolors = uvmexp.ncolors;
908
909 uvmexp.ncolors = newncolors;
910 uvmexp.colormask = uvmexp.ncolors - 1;
911
912 ucpu = curcpu()->ci_data.cpu_uvm;
913 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
914 gpgfl.pgfl_buckets = (bucketarray + (lcv * newncolors));
915 pgfl.pgfl_buckets = (cpuarray + (lcv * uvmexp.ncolors));
916 uvm_page_init_buckets(&gpgfl);
917 uvm_page_init_buckets(&pgfl);
918 for (color = 0; color < ocolors; color++) {
919 for (i = 0; i < PGFL_NQUEUES; i++) {
920 while ((pg = LIST_FIRST(&uvm.page_free[
921 lcv].pgfl_buckets[color].pgfl_queues[i]))
922 != NULL) {
923 LIST_REMOVE(pg, pageq.list); /* global */
924 LIST_REMOVE(pg, listq.list); /* cpu */
925 LIST_INSERT_HEAD(&gpgfl.pgfl_buckets[
926 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
927 i], pg, pageq.list);
928 LIST_INSERT_HEAD(&pgfl.pgfl_buckets[
929 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
930 i], pg, listq.list);
931 }
932 }
933 }
934 uvm.page_free[lcv].pgfl_buckets = gpgfl.pgfl_buckets;
935 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
936 }
937
938 if (have_recolored_pages) {
939 mutex_spin_exit(&uvm_fpageqlock);
940 free(oldbucketarray, M_VMPAGE);
941 return;
942 }
943
944 have_recolored_pages = true;
945 mutex_spin_exit(&uvm_fpageqlock);
946 }
947
948 /*
949 * uvm_cpu_attach: initialize per-CPU data structures.
950 */
951
952 void
953 uvm_cpu_attach(struct cpu_info *ci)
954 {
955 struct pgflbucket *bucketarray;
956 struct pgfreelist pgfl;
957 struct uvm_cpu *ucpu;
958 vsize_t bucketcount;
959 int lcv;
960
961 if (CPU_IS_PRIMARY(ci)) {
962 /* Already done in uvm_page_init(). */
963 return;
964 }
965
966 /* Add more reserve pages for this CPU. */
967 uvmexp.reserve_kernel += vm_page_reserve_kernel;
968
969 /* Configure this CPU's free lists. */
970 bucketcount = uvmexp.ncolors * VM_NFREELIST;
971 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket),
972 M_VMPAGE, M_WAITOK);
973 ucpu = &uvm.cpus[cpu_index(ci)];
974 ci->ci_data.cpu_uvm = ucpu;
975 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
976 pgfl.pgfl_buckets = (bucketarray + (lcv * uvmexp.ncolors));
977 uvm_page_init_buckets(&pgfl);
978 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
979 }
980 }
981
982 /*
983 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
984 */
985
986 static struct vm_page *
987 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int flist, int try1, int try2,
988 int *trycolorp)
989 {
990 struct pgflist *freeq;
991 struct vm_page *pg;
992 int color, trycolor = *trycolorp;
993 struct pgfreelist *gpgfl, *pgfl;
994
995 KASSERT(mutex_owned(&uvm_fpageqlock));
996
997 color = trycolor;
998 pgfl = &ucpu->page_free[flist];
999 gpgfl = &uvm.page_free[flist];
1000 do {
1001 /* cpu, try1 */
1002 if ((pg = LIST_FIRST((freeq =
1003 &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
1004 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
1005 uvmexp.cpuhit++;
1006 goto gotit;
1007 }
1008 /* global, try1 */
1009 if ((pg = LIST_FIRST((freeq =
1010 &gpgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
1011 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
1012 uvmexp.cpumiss++;
1013 goto gotit;
1014 }
1015 /* cpu, try2 */
1016 if ((pg = LIST_FIRST((freeq =
1017 &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
1018 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
1019 uvmexp.cpuhit++;
1020 goto gotit;
1021 }
1022 /* global, try2 */
1023 if ((pg = LIST_FIRST((freeq =
1024 &gpgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
1025 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
1026 uvmexp.cpumiss++;
1027 goto gotit;
1028 }
1029 color = (color + 1) & uvmexp.colormask;
1030 } while (color != trycolor);
1031
1032 return (NULL);
1033
1034 gotit:
1035 LIST_REMOVE(pg, pageq.list); /* global list */
1036 LIST_REMOVE(pg, listq.list); /* per-cpu list */
1037 uvmexp.free--;
1038
1039 /* update zero'd page count */
1040 if (pg->flags & PG_ZERO)
1041 uvmexp.zeropages--;
1042
1043 if (color == trycolor)
1044 uvmexp.colorhit++;
1045 else {
1046 uvmexp.colormiss++;
1047 *trycolorp = color;
1048 }
1049
1050 return (pg);
1051 }
1052
1053 /*
1054 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
1055 *
1056 * => return null if no pages free
1057 * => wake up pagedaemon if number of free pages drops below low water mark
1058 * => if obj != NULL, obj must be locked (to put in obj's tree)
1059 * => if anon != NULL, anon must be locked (to put in anon)
1060 * => only one of obj or anon can be non-null
1061 * => caller must activate/deactivate page if it is not wired.
1062 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
1063 * => policy decision: it is more important to pull a page off of the
1064 * appropriate priority free list than it is to get a zero'd or
1065 * unknown contents page. This is because we live with the
1066 * consequences of a bad free list decision for the entire
1067 * lifetime of the page, e.g. if the page comes from memory that
1068 * is slower to access.
1069 */
1070
1071 struct vm_page *
1072 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
1073 int flags, int strat, int free_list)
1074 {
1075 int lcv, try1, try2, zeroit = 0, color;
1076 struct uvm_cpu *ucpu;
1077 struct vm_page *pg;
1078 lwp_t *l;
1079
1080 KASSERT(obj == NULL || anon == NULL);
1081 KASSERT(anon == NULL || off == 0);
1082 KASSERT(off == trunc_page(off));
1083 KASSERT(obj == NULL || mutex_owned(&obj->vmobjlock));
1084 KASSERT(anon == NULL || mutex_owned(&anon->an_lock));
1085
1086 mutex_spin_enter(&uvm_fpageqlock);
1087
1088 /*
1089 * This implements a global round-robin page coloring
1090 * algorithm.
1091 *
1092 * XXXJRT: What about virtually-indexed caches?
1093 */
1094
1095 ucpu = curcpu()->ci_data.cpu_uvm;
1096 color = ucpu->page_free_nextcolor;
1097
1098 /*
1099 * check to see if we need to generate some free pages waking
1100 * the pagedaemon.
1101 */
1102
1103 uvm_kick_pdaemon();
1104
1105 /*
1106 * fail if any of these conditions is true:
1107 * [1] there really are no free pages, or
1108 * [2] only kernel "reserved" pages remain and
1109 * reserved pages have not been requested.
1110 * [3] only pagedaemon "reserved" pages remain and
1111 * the requestor isn't the pagedaemon.
1112 * we make kernel reserve pages available if called by a
1113 * kernel thread or a realtime thread.
1114 */
1115 l = curlwp;
1116 if (__predict_true(l != NULL) && lwp_eprio(l) >= PRI_KTHREAD) {
1117 flags |= UVM_PGA_USERESERVE;
1118 }
1119 if ((uvmexp.free <= uvmexp.reserve_kernel &&
1120 (flags & UVM_PGA_USERESERVE) == 0) ||
1121 (uvmexp.free <= uvmexp.reserve_pagedaemon &&
1122 curlwp != uvm.pagedaemon_lwp))
1123 goto fail;
1124
1125 #if PGFL_NQUEUES != 2
1126 #error uvm_pagealloc_strat needs to be updated
1127 #endif
1128
1129 /*
1130 * If we want a zero'd page, try the ZEROS queue first, otherwise
1131 * we try the UNKNOWN queue first.
1132 */
1133 if (flags & UVM_PGA_ZERO) {
1134 try1 = PGFL_ZEROS;
1135 try2 = PGFL_UNKNOWN;
1136 } else {
1137 try1 = PGFL_UNKNOWN;
1138 try2 = PGFL_ZEROS;
1139 }
1140
1141 again:
1142 switch (strat) {
1143 case UVM_PGA_STRAT_NORMAL:
1144 /* Check freelists: descending priority (ascending id) order */
1145 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
1146 pg = uvm_pagealloc_pgfl(ucpu, lcv,
1147 try1, try2, &color);
1148 if (pg != NULL)
1149 goto gotit;
1150 }
1151
1152 /* No pages free! */
1153 goto fail;
1154
1155 case UVM_PGA_STRAT_ONLY:
1156 case UVM_PGA_STRAT_FALLBACK:
1157 /* Attempt to allocate from the specified free list. */
1158 KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
1159 pg = uvm_pagealloc_pgfl(ucpu, free_list,
1160 try1, try2, &color);
1161 if (pg != NULL)
1162 goto gotit;
1163
1164 /* Fall back, if possible. */
1165 if (strat == UVM_PGA_STRAT_FALLBACK) {
1166 strat = UVM_PGA_STRAT_NORMAL;
1167 goto again;
1168 }
1169
1170 /* No pages free! */
1171 goto fail;
1172
1173 default:
1174 panic("uvm_pagealloc_strat: bad strat %d", strat);
1175 /* NOTREACHED */
1176 }
1177
1178 gotit:
1179 /*
1180 * We now know which color we actually allocated from; set
1181 * the next color accordingly.
1182 */
1183
1184 ucpu->page_free_nextcolor = (color + 1) & uvmexp.colormask;
1185
1186 /*
1187 * update allocation statistics and remember if we have to
1188 * zero the page
1189 */
1190
1191 if (flags & UVM_PGA_ZERO) {
1192 if (pg->flags & PG_ZERO) {
1193 uvmexp.pga_zerohit++;
1194 zeroit = 0;
1195 } else {
1196 uvmexp.pga_zeromiss++;
1197 zeroit = 1;
1198 }
1199 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
1200 ucpu->page_idle_zero = vm_page_zero_enable;
1201 }
1202 }
1203 KASSERT(pg->pqflags == PQ_FREE);
1204
1205 pg->offset = off;
1206 pg->uobject = obj;
1207 pg->uanon = anon;
1208 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
1209 if (anon) {
1210 anon->an_page = pg;
1211 pg->pqflags = PQ_ANON;
1212 atomic_inc_uint(&uvmexp.anonpages);
1213 } else {
1214 if (obj) {
1215 uvm_pageinsert(pg);
1216 }
1217 pg->pqflags = 0;
1218 }
1219 mutex_spin_exit(&uvm_fpageqlock);
1220
1221 #if defined(UVM_PAGE_TRKOWN)
1222 pg->owner_tag = NULL;
1223 #endif
1224 UVM_PAGE_OWN(pg, "new alloc");
1225
1226 if (flags & UVM_PGA_ZERO) {
1227 /*
1228 * A zero'd page is not clean. If we got a page not already
1229 * zero'd, then we have to zero it ourselves.
1230 */
1231 pg->flags &= ~PG_CLEAN;
1232 if (zeroit)
1233 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1234 }
1235
1236 return(pg);
1237
1238 fail:
1239 mutex_spin_exit(&uvm_fpageqlock);
1240 return (NULL);
1241 }
1242
1243 /*
1244 * uvm_pagereplace: replace a page with another
1245 *
1246 * => object must be locked
1247 */
1248
1249 void
1250 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg)
1251 {
1252 struct uvm_object *uobj = oldpg->uobject;
1253
1254 KASSERT((oldpg->flags & PG_TABLED) != 0);
1255 KASSERT(uobj != NULL);
1256 KASSERT((newpg->flags & PG_TABLED) == 0);
1257 KASSERT(newpg->uobject == NULL);
1258 KASSERT(mutex_owned(&uobj->vmobjlock));
1259
1260 newpg->uobject = uobj;
1261 newpg->offset = oldpg->offset;
1262
1263 uvm_pageremove_tree(uobj, oldpg);
1264 uvm_pageinsert_tree(uobj, newpg);
1265 uvm_pageinsert_list(uobj, newpg, oldpg);
1266 uvm_pageremove_list(uobj, oldpg);
1267 }
1268
1269 /*
1270 * uvm_pagerealloc: reallocate a page from one object to another
1271 *
1272 * => both objects must be locked
1273 */
1274
1275 void
1276 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
1277 {
1278 /*
1279 * remove it from the old object
1280 */
1281
1282 if (pg->uobject) {
1283 uvm_pageremove(pg);
1284 }
1285
1286 /*
1287 * put it in the new object
1288 */
1289
1290 if (newobj) {
1291 pg->uobject = newobj;
1292 pg->offset = newoff;
1293 uvm_pageinsert(pg);
1294 }
1295 }
1296
1297 #ifdef DEBUG
1298 /*
1299 * check if page is zero-filled
1300 *
1301 * - called with free page queue lock held.
1302 */
1303 void
1304 uvm_pagezerocheck(struct vm_page *pg)
1305 {
1306 int *p, *ep;
1307
1308 KASSERT(uvm_zerocheckkva != 0);
1309 KASSERT(mutex_owned(&uvm_fpageqlock));
1310
1311 /*
1312 * XXX assuming pmap_kenter_pa and pmap_kremove never call
1313 * uvm page allocator.
1314 *
1315 * it might be better to have "CPU-local temporary map" pmap interface.
1316 */
1317 pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ);
1318 p = (int *)uvm_zerocheckkva;
1319 ep = (int *)((char *)p + PAGE_SIZE);
1320 pmap_update(pmap_kernel());
1321 while (p < ep) {
1322 if (*p != 0)
1323 panic("PG_ZERO page isn't zero-filled");
1324 p++;
1325 }
1326 pmap_kremove(uvm_zerocheckkva, PAGE_SIZE);
1327 /*
1328 * pmap_update() is not necessary here because no one except us
1329 * uses this VA.
1330 */
1331 }
1332 #endif /* DEBUG */
1333
1334 /*
1335 * uvm_pagefree: free page
1336 *
1337 * => erase page's identity (i.e. remove from object)
1338 * => put page on free list
1339 * => caller must lock owning object (either anon or uvm_object)
1340 * => caller must lock page queues
1341 * => assumes all valid mappings of pg are gone
1342 */
1343
1344 void
1345 uvm_pagefree(struct vm_page *pg)
1346 {
1347 struct pgflist *pgfl;
1348 struct uvm_cpu *ucpu;
1349 int index, color, queue;
1350 bool iszero;
1351
1352 #ifdef DEBUG
1353 if (pg->uobject == (void *)0xdeadbeef &&
1354 pg->uanon == (void *)0xdeadbeef) {
1355 panic("uvm_pagefree: freeing free page %p", pg);
1356 }
1357 #endif /* DEBUG */
1358
1359 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1360 KASSERT(!(pg->pqflags & PQ_FREE));
1361 KASSERT(mutex_owned(&uvm_pageqlock) || !uvmpdpol_pageisqueued_p(pg));
1362 KASSERT(pg->uobject == NULL || mutex_owned(&pg->uobject->vmobjlock));
1363 KASSERT(pg->uobject != NULL || pg->uanon == NULL ||
1364 mutex_owned(&pg->uanon->an_lock));
1365
1366 /*
1367 * if the page is loaned, resolve the loan instead of freeing.
1368 */
1369
1370 if (pg->loan_count) {
1371 KASSERT(pg->wire_count == 0);
1372
1373 /*
1374 * if the page is owned by an anon then we just want to
1375 * drop anon ownership. the kernel will free the page when
1376 * it is done with it. if the page is owned by an object,
1377 * remove it from the object and mark it dirty for the benefit
1378 * of possible anon owners.
1379 *
1380 * regardless of previous ownership, wakeup any waiters,
1381 * unbusy the page, and we're done.
1382 */
1383
1384 if (pg->uobject != NULL) {
1385 uvm_pageremove(pg);
1386 pg->flags &= ~PG_CLEAN;
1387 } else if (pg->uanon != NULL) {
1388 if ((pg->pqflags & PQ_ANON) == 0) {
1389 pg->loan_count--;
1390 } else {
1391 pg->pqflags &= ~PQ_ANON;
1392 atomic_dec_uint(&uvmexp.anonpages);
1393 }
1394 pg->uanon->an_page = NULL;
1395 pg->uanon = NULL;
1396 }
1397 if (pg->flags & PG_WANTED) {
1398 wakeup(pg);
1399 }
1400 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
1401 #ifdef UVM_PAGE_TRKOWN
1402 pg->owner_tag = NULL;
1403 #endif
1404 if (pg->loan_count) {
1405 KASSERT(pg->uobject == NULL);
1406 if (pg->uanon == NULL) {
1407 uvm_pagedequeue(pg);
1408 }
1409 return;
1410 }
1411 }
1412
1413 /*
1414 * remove page from its object or anon.
1415 */
1416
1417 if (pg->uobject != NULL) {
1418 uvm_pageremove(pg);
1419 } else if (pg->uanon != NULL) {
1420 pg->uanon->an_page = NULL;
1421 atomic_dec_uint(&uvmexp.anonpages);
1422 }
1423
1424 /*
1425 * now remove the page from the queues.
1426 */
1427
1428 uvm_pagedequeue(pg);
1429
1430 /*
1431 * if the page was wired, unwire it now.
1432 */
1433
1434 if (pg->wire_count) {
1435 pg->wire_count = 0;
1436 uvmexp.wired--;
1437 }
1438
1439 /*
1440 * and put on free queue
1441 */
1442
1443 iszero = (pg->flags & PG_ZERO);
1444 index = uvm_page_lookup_freelist(pg);
1445 color = VM_PGCOLOR_BUCKET(pg);
1446 queue = (iszero ? PGFL_ZEROS : PGFL_UNKNOWN);
1447
1448 #ifdef DEBUG
1449 pg->uobject = (void *)0xdeadbeef;
1450 pg->uanon = (void *)0xdeadbeef;
1451 #endif
1452
1453 mutex_spin_enter(&uvm_fpageqlock);
1454 pg->pqflags = PQ_FREE;
1455
1456 #ifdef DEBUG
1457 if (iszero)
1458 uvm_pagezerocheck(pg);
1459 #endif /* DEBUG */
1460
1461
1462 /* global list */
1463 pgfl = &uvm.page_free[index].pgfl_buckets[color].pgfl_queues[queue];
1464 LIST_INSERT_HEAD(pgfl, pg, pageq.list);
1465 uvmexp.free++;
1466 if (iszero) {
1467 uvmexp.zeropages++;
1468 }
1469
1470 /* per-cpu list */
1471 ucpu = curcpu()->ci_data.cpu_uvm;
1472 pg->offset = (uintptr_t)ucpu;
1473 pgfl = &ucpu->page_free[index].pgfl_buckets[color].pgfl_queues[queue];
1474 LIST_INSERT_HEAD(pgfl, pg, listq.list);
1475 ucpu->pages[queue]++;
1476 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
1477 ucpu->page_idle_zero = vm_page_zero_enable;
1478 }
1479
1480 mutex_spin_exit(&uvm_fpageqlock);
1481 }
1482
1483 /*
1484 * uvm_page_unbusy: unbusy an array of pages.
1485 *
1486 * => pages must either all belong to the same object, or all belong to anons.
1487 * => if pages are object-owned, object must be locked.
1488 * => if pages are anon-owned, anons must be locked.
1489 * => caller must lock page queues if pages may be released.
1490 * => caller must make sure that anon-owned pages are not PG_RELEASED.
1491 */
1492
1493 void
1494 uvm_page_unbusy(struct vm_page **pgs, int npgs)
1495 {
1496 struct vm_page *pg;
1497 int i;
1498 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
1499
1500 for (i = 0; i < npgs; i++) {
1501 pg = pgs[i];
1502 if (pg == NULL || pg == PGO_DONTCARE) {
1503 continue;
1504 }
1505
1506 KASSERT(pg->uobject == NULL ||
1507 mutex_owned(&pg->uobject->vmobjlock));
1508 KASSERT(pg->uobject != NULL ||
1509 (pg->uanon != NULL && mutex_owned(&pg->uanon->an_lock)));
1510
1511 KASSERT(pg->flags & PG_BUSY);
1512 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1513 if (pg->flags & PG_WANTED) {
1514 wakeup(pg);
1515 }
1516 if (pg->flags & PG_RELEASED) {
1517 UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0);
1518 KASSERT(pg->uobject != NULL ||
1519 (pg->uanon != NULL && pg->uanon->an_ref > 0));
1520 pg->flags &= ~PG_RELEASED;
1521 uvm_pagefree(pg);
1522 } else {
1523 UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0);
1524 KASSERT((pg->flags & PG_FAKE) == 0);
1525 pg->flags &= ~(PG_WANTED|PG_BUSY);
1526 UVM_PAGE_OWN(pg, NULL);
1527 }
1528 }
1529 }
1530
1531 #if defined(UVM_PAGE_TRKOWN)
1532 /*
1533 * uvm_page_own: set or release page ownership
1534 *
1535 * => this is a debugging function that keeps track of who sets PG_BUSY
1536 * and where they do it. it can be used to track down problems
1537 * such a process setting "PG_BUSY" and never releasing it.
1538 * => page's object [if any] must be locked
1539 * => if "tag" is NULL then we are releasing page ownership
1540 */
1541 void
1542 uvm_page_own(struct vm_page *pg, const char *tag)
1543 {
1544 struct uvm_object *uobj;
1545 struct vm_anon *anon;
1546
1547 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
1548
1549 uobj = pg->uobject;
1550 anon = pg->uanon;
1551 if (uobj != NULL) {
1552 KASSERT(mutex_owned(&uobj->vmobjlock));
1553 } else if (anon != NULL) {
1554 KASSERT(mutex_owned(&anon->an_lock));
1555 }
1556
1557 KASSERT((pg->flags & PG_WANTED) == 0);
1558
1559 /* gain ownership? */
1560 if (tag) {
1561 KASSERT((pg->flags & PG_BUSY) != 0);
1562 if (pg->owner_tag) {
1563 printf("uvm_page_own: page %p already owned "
1564 "by proc %d [%s]\n", pg,
1565 pg->owner, pg->owner_tag);
1566 panic("uvm_page_own");
1567 }
1568 pg->owner = (curproc) ? curproc->p_pid : (pid_t) -1;
1569 pg->lowner = (curlwp) ? curlwp->l_lid : (lwpid_t) -1;
1570 pg->owner_tag = tag;
1571 return;
1572 }
1573
1574 /* drop ownership */
1575 KASSERT((pg->flags & PG_BUSY) == 0);
1576 if (pg->owner_tag == NULL) {
1577 printf("uvm_page_own: dropping ownership of an non-owned "
1578 "page (%p)\n", pg);
1579 panic("uvm_page_own");
1580 }
1581 if (!uvmpdpol_pageisqueued_p(pg)) {
1582 KASSERT((pg->uanon == NULL && pg->uobject == NULL) ||
1583 pg->wire_count > 0);
1584 } else {
1585 KASSERT(pg->wire_count == 0);
1586 }
1587 pg->owner_tag = NULL;
1588 }
1589 #endif
1590
1591 /*
1592 * uvm_pageidlezero: zero free pages while the system is idle.
1593 *
1594 * => try to complete one color bucket at a time, to reduce our impact
1595 * on the CPU cache.
1596 * => we loop until we either reach the target or there is a lwp ready
1597 * to run, or MD code detects a reason to break early.
1598 */
1599 void
1600 uvm_pageidlezero(void)
1601 {
1602 struct vm_page *pg;
1603 struct pgfreelist *pgfl, *gpgfl;
1604 struct uvm_cpu *ucpu;
1605 int free_list, firstbucket, nextbucket;
1606
1607 ucpu = curcpu()->ci_data.cpu_uvm;
1608 if (!ucpu->page_idle_zero ||
1609 ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
1610 ucpu->page_idle_zero = false;
1611 return;
1612 }
1613 mutex_enter(&uvm_fpageqlock);
1614 firstbucket = ucpu->page_free_nextcolor;
1615 nextbucket = firstbucket;
1616 do {
1617 for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
1618 if (sched_curcpu_runnable_p()) {
1619 goto quit;
1620 }
1621 pgfl = &ucpu->page_free[free_list];
1622 gpgfl = &uvm.page_free[free_list];
1623 while ((pg = LIST_FIRST(&pgfl->pgfl_buckets[
1624 nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
1625 if (sched_curcpu_runnable_p()) {
1626 goto quit;
1627 }
1628 LIST_REMOVE(pg, pageq.list); /* global list */
1629 LIST_REMOVE(pg, listq.list); /* per-cpu list */
1630 ucpu->pages[PGFL_UNKNOWN]--;
1631 uvmexp.free--;
1632 KASSERT(pg->pqflags == PQ_FREE);
1633 pg->pqflags = 0;
1634 mutex_spin_exit(&uvm_fpageqlock);
1635 #ifdef PMAP_PAGEIDLEZERO
1636 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) {
1637
1638 /*
1639 * The machine-dependent code detected
1640 * some reason for us to abort zeroing
1641 * pages, probably because there is a
1642 * process now ready to run.
1643 */
1644
1645 mutex_spin_enter(&uvm_fpageqlock);
1646 pg->pqflags = PQ_FREE;
1647 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
1648 nextbucket].pgfl_queues[
1649 PGFL_UNKNOWN], pg, pageq.list);
1650 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
1651 nextbucket].pgfl_queues[
1652 PGFL_UNKNOWN], pg, listq.list);
1653 ucpu->pages[PGFL_UNKNOWN]++;
1654 uvmexp.free++;
1655 uvmexp.zeroaborts++;
1656 goto quit;
1657 }
1658 #else
1659 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1660 #endif /* PMAP_PAGEIDLEZERO */
1661 pg->flags |= PG_ZERO;
1662
1663 mutex_spin_enter(&uvm_fpageqlock);
1664 pg->pqflags = PQ_FREE;
1665 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
1666 nextbucket].pgfl_queues[PGFL_ZEROS],
1667 pg, pageq.list);
1668 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
1669 nextbucket].pgfl_queues[PGFL_ZEROS],
1670 pg, listq.list);
1671 ucpu->pages[PGFL_ZEROS]++;
1672 uvmexp.free++;
1673 uvmexp.zeropages++;
1674 }
1675 }
1676 if (ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
1677 break;
1678 }
1679 nextbucket = (nextbucket + 1) & uvmexp.colormask;
1680 } while (nextbucket != firstbucket);
1681 ucpu->page_idle_zero = false;
1682 quit:
1683 mutex_spin_exit(&uvm_fpageqlock);
1684 }
1685
1686 /*
1687 * uvm_pagelookup: look up a page
1688 *
1689 * => caller should lock object to keep someone from pulling the page
1690 * out from under it
1691 */
1692
1693 struct vm_page *
1694 uvm_pagelookup(struct uvm_object *obj, voff_t off)
1695 {
1696 struct vm_page *pg;
1697
1698 KASSERT(mutex_owned(&obj->vmobjlock));
1699
1700 pg = (struct vm_page *)rb_tree_find_node(&obj->rb_tree, &off);
1701
1702 KASSERT(pg == NULL || obj->uo_npages != 0);
1703 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1704 (pg->flags & PG_BUSY) != 0);
1705 return(pg);
1706 }
1707
1708 /*
1709 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1710 *
1711 * => caller must lock page queues
1712 */
1713
1714 void
1715 uvm_pagewire(struct vm_page *pg)
1716 {
1717 KASSERT(mutex_owned(&uvm_pageqlock));
1718 #if defined(READAHEAD_STATS)
1719 if ((pg->pqflags & PQ_READAHEAD) != 0) {
1720 uvm_ra_hit.ev_count++;
1721 pg->pqflags &= ~PQ_READAHEAD;
1722 }
1723 #endif /* defined(READAHEAD_STATS) */
1724 if (pg->wire_count == 0) {
1725 uvm_pagedequeue(pg);
1726 uvmexp.wired++;
1727 }
1728 pg->wire_count++;
1729 }
1730
1731 /*
1732 * uvm_pageunwire: unwire the page.
1733 *
1734 * => activate if wire count goes to zero.
1735 * => caller must lock page queues
1736 */
1737
1738 void
1739 uvm_pageunwire(struct vm_page *pg)
1740 {
1741 KASSERT(mutex_owned(&uvm_pageqlock));
1742 pg->wire_count--;
1743 if (pg->wire_count == 0) {
1744 uvm_pageactivate(pg);
1745 uvmexp.wired--;
1746 }
1747 }
1748
1749 /*
1750 * uvm_pagedeactivate: deactivate page
1751 *
1752 * => caller must lock page queues
1753 * => caller must check to make sure page is not wired
1754 * => object that page belongs to must be locked (so we can adjust pg->flags)
1755 * => caller must clear the reference on the page before calling
1756 */
1757
1758 void
1759 uvm_pagedeactivate(struct vm_page *pg)
1760 {
1761
1762 KASSERT(mutex_owned(&uvm_pageqlock));
1763 KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg));
1764 uvmpdpol_pagedeactivate(pg);
1765 }
1766
1767 /*
1768 * uvm_pageactivate: activate page
1769 *
1770 * => caller must lock page queues
1771 */
1772
1773 void
1774 uvm_pageactivate(struct vm_page *pg)
1775 {
1776
1777 KASSERT(mutex_owned(&uvm_pageqlock));
1778 #if defined(READAHEAD_STATS)
1779 if ((pg->pqflags & PQ_READAHEAD) != 0) {
1780 uvm_ra_hit.ev_count++;
1781 pg->pqflags &= ~PQ_READAHEAD;
1782 }
1783 #endif /* defined(READAHEAD_STATS) */
1784 if (pg->wire_count != 0) {
1785 return;
1786 }
1787 uvmpdpol_pageactivate(pg);
1788 }
1789
1790 /*
1791 * uvm_pagedequeue: remove a page from any paging queue
1792 */
1793
1794 void
1795 uvm_pagedequeue(struct vm_page *pg)
1796 {
1797
1798 if (uvmpdpol_pageisqueued_p(pg)) {
1799 KASSERT(mutex_owned(&uvm_pageqlock));
1800 }
1801
1802 uvmpdpol_pagedequeue(pg);
1803 }
1804
1805 /*
1806 * uvm_pageenqueue: add a page to a paging queue without activating.
1807 * used where a page is not really demanded (yet). eg. read-ahead
1808 */
1809
1810 void
1811 uvm_pageenqueue(struct vm_page *pg)
1812 {
1813
1814 KASSERT(mutex_owned(&uvm_pageqlock));
1815 if (pg->wire_count != 0) {
1816 return;
1817 }
1818 uvmpdpol_pageenqueue(pg);
1819 }
1820
1821 /*
1822 * uvm_pagezero: zero fill a page
1823 *
1824 * => if page is part of an object then the object should be locked
1825 * to protect pg->flags.
1826 */
1827
1828 void
1829 uvm_pagezero(struct vm_page *pg)
1830 {
1831 pg->flags &= ~PG_CLEAN;
1832 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1833 }
1834
1835 /*
1836 * uvm_pagecopy: copy a page
1837 *
1838 * => if page is part of an object then the object should be locked
1839 * to protect pg->flags.
1840 */
1841
1842 void
1843 uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
1844 {
1845
1846 dst->flags &= ~PG_CLEAN;
1847 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
1848 }
1849
1850 /*
1851 * uvm_page_lookup_freelist: look up the free list for the specified page
1852 */
1853
1854 int
1855 uvm_page_lookup_freelist(struct vm_page *pg)
1856 {
1857 int lcv;
1858
1859 lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
1860 KASSERT(lcv != -1);
1861 return (vm_physmem[lcv].free_list);
1862 }
1863