uvm_page.c revision 1.111 1 /* $NetBSD: uvm_page.c,v 1.111 2006/02/12 09:19:27 yamt Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
42 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
43 *
44 *
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 */
68
69 /*
70 * uvm_page.c: page ops.
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.111 2006/02/12 09:19:27 yamt Exp $");
75
76 #include "opt_uvmhist.h"
77
78 #include <sys/param.h>
79 #include <sys/systm.h>
80 #include <sys/malloc.h>
81 #include <sys/sched.h>
82 #include <sys/kernel.h>
83 #include <sys/vnode.h>
84 #include <sys/proc.h>
85
86 #include <uvm/uvm.h>
87
88 /*
89 * global vars... XXXCDC: move to uvm. structure.
90 */
91
92 /*
93 * physical memory config is stored in vm_physmem.
94 */
95
96 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */
97 int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */
98
99 /*
100 * Some supported CPUs in a given architecture don't support all
101 * of the things necessary to do idle page zero'ing efficiently.
102 * We therefore provide a way to disable it from machdep code here.
103 */
104 /*
105 * XXX disabled until we can find a way to do this without causing
106 * problems for either CPU caches or DMA latency.
107 */
108 boolean_t vm_page_zero_enable = FALSE;
109
110 /*
111 * local variables
112 */
113
114 /*
115 * these variables record the values returned by vm_page_bootstrap,
116 * for debugging purposes. The implementation of uvm_pageboot_alloc
117 * and pmap_startup here also uses them internally.
118 */
119
120 static vaddr_t virtual_space_start;
121 static vaddr_t virtual_space_end;
122
123 /*
124 * we use a hash table with only one bucket during bootup. we will
125 * later rehash (resize) the hash table once the allocator is ready.
126 * we static allocate the one bootstrap bucket below...
127 */
128
129 static struct pglist uvm_bootbucket;
130
131 /*
132 * we allocate an initial number of page colors in uvm_page_init(),
133 * and remember them. We may re-color pages as cache sizes are
134 * discovered during the autoconfiguration phase. But we can never
135 * free the initial set of buckets, since they are allocated using
136 * uvm_pageboot_alloc().
137 */
138
139 static boolean_t have_recolored_pages /* = FALSE */;
140
141 MALLOC_DEFINE(M_VMPAGE, "VM page", "VM page");
142
143 #ifdef DEBUG
144 vaddr_t uvm_zerocheckkva;
145 #endif /* DEBUG */
146
147 /*
148 * local prototypes
149 */
150
151 static void uvm_pageinsert(struct vm_page *);
152 static void uvm_pageinsert_after(struct vm_page *, struct vm_page *);
153 static void uvm_pageremove(struct vm_page *);
154
155 /*
156 * inline functions
157 */
158
159 /*
160 * uvm_pageinsert: insert a page in the object and the hash table
161 * uvm_pageinsert_after: insert a page into the specified place in listq
162 *
163 * => caller must lock object
164 * => caller must lock page queues
165 * => call should have already set pg's object and offset pointers
166 * and bumped the version counter
167 */
168
169 inline static void
170 uvm_pageinsert_after(struct vm_page *pg, struct vm_page *where)
171 {
172 struct pglist *buck;
173 struct uvm_object *uobj = pg->uobject;
174
175 KASSERT((pg->flags & PG_TABLED) == 0);
176 KASSERT(where == NULL || (where->flags & PG_TABLED));
177 KASSERT(where == NULL || (where->uobject == uobj));
178 buck = &uvm.page_hash[uvm_pagehash(uobj, pg->offset)];
179 simple_lock(&uvm.hashlock);
180 TAILQ_INSERT_TAIL(buck, pg, hashq);
181 simple_unlock(&uvm.hashlock);
182
183 if (UVM_OBJ_IS_VNODE(uobj)) {
184 if (uobj->uo_npages == 0) {
185 struct vnode *vp = (struct vnode *)uobj;
186
187 vholdl(vp);
188 }
189 if (UVM_OBJ_IS_VTEXT(uobj)) {
190 uvmexp.execpages++;
191 } else {
192 uvmexp.filepages++;
193 }
194 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
195 uvmexp.anonpages++;
196 }
197
198 if (where)
199 TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq);
200 else
201 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq);
202 pg->flags |= PG_TABLED;
203 uobj->uo_npages++;
204 }
205
206 inline static void
207 uvm_pageinsert(struct vm_page *pg)
208 {
209
210 uvm_pageinsert_after(pg, NULL);
211 }
212
213 /*
214 * uvm_page_remove: remove page from object and hash
215 *
216 * => caller must lock object
217 * => caller must lock page queues
218 */
219
220 static inline void
221 uvm_pageremove(struct vm_page *pg)
222 {
223 struct pglist *buck;
224 struct uvm_object *uobj = pg->uobject;
225
226 KASSERT(pg->flags & PG_TABLED);
227 buck = &uvm.page_hash[uvm_pagehash(uobj, pg->offset)];
228 simple_lock(&uvm.hashlock);
229 TAILQ_REMOVE(buck, pg, hashq);
230 simple_unlock(&uvm.hashlock);
231
232 if (UVM_OBJ_IS_VNODE(uobj)) {
233 if (uobj->uo_npages == 1) {
234 struct vnode *vp = (struct vnode *)uobj;
235
236 holdrelel(vp);
237 }
238 if (UVM_OBJ_IS_VTEXT(uobj)) {
239 uvmexp.execpages--;
240 } else {
241 uvmexp.filepages--;
242 }
243 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
244 uvmexp.anonpages--;
245 }
246
247 /* object should be locked */
248 uobj->uo_npages--;
249 TAILQ_REMOVE(&uobj->memq, pg, listq);
250 pg->flags &= ~PG_TABLED;
251 pg->uobject = NULL;
252 }
253
254 static void
255 uvm_page_init_buckets(struct pgfreelist *pgfl)
256 {
257 int color, i;
258
259 for (color = 0; color < uvmexp.ncolors; color++) {
260 for (i = 0; i < PGFL_NQUEUES; i++) {
261 TAILQ_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]);
262 }
263 }
264 }
265
266 /*
267 * uvm_page_init: init the page system. called from uvm_init().
268 *
269 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
270 */
271
272 void
273 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
274 {
275 vsize_t freepages, pagecount, bucketcount, n;
276 struct pgflbucket *bucketarray;
277 struct vm_page *pagearray;
278 int lcv;
279 u_int i;
280 paddr_t paddr;
281
282 /*
283 * init the page queues and page queue locks, except the free
284 * list; we allocate that later (with the initial vm_page
285 * structures).
286 */
287
288 TAILQ_INIT(&uvm.page_active);
289 TAILQ_INIT(&uvm.page_inactive);
290 simple_lock_init(&uvm.pageqlock);
291 simple_lock_init(&uvm.fpageqlock);
292
293 /*
294 * init the <obj,offset> => <page> hash table. for now
295 * we just have one bucket (the bootstrap bucket). later on we
296 * will allocate new buckets as we dynamically resize the hash table.
297 */
298
299 uvm.page_nhash = 1; /* 1 bucket */
300 uvm.page_hashmask = 0; /* mask for hash function */
301 uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */
302 TAILQ_INIT(uvm.page_hash); /* init hash table */
303 simple_lock_init(&uvm.hashlock); /* init hash table lock */
304
305 /*
306 * allocate vm_page structures.
307 */
308
309 /*
310 * sanity check:
311 * before calling this function the MD code is expected to register
312 * some free RAM with the uvm_page_physload() function. our job
313 * now is to allocate vm_page structures for this memory.
314 */
315
316 if (vm_nphysseg == 0)
317 panic("uvm_page_bootstrap: no memory pre-allocated");
318
319 /*
320 * first calculate the number of free pages...
321 *
322 * note that we use start/end rather than avail_start/avail_end.
323 * this allows us to allocate extra vm_page structures in case we
324 * want to return some memory to the pool after booting.
325 */
326
327 freepages = 0;
328 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
329 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
330
331 /*
332 * Let MD code initialize the number of colors, or default
333 * to 1 color if MD code doesn't care.
334 */
335 if (uvmexp.ncolors == 0)
336 uvmexp.ncolors = 1;
337 uvmexp.colormask = uvmexp.ncolors - 1;
338
339 /*
340 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
341 * use. for each page of memory we use we need a vm_page structure.
342 * thus, the total number of pages we can use is the total size of
343 * the memory divided by the PAGE_SIZE plus the size of the vm_page
344 * structure. we add one to freepages as a fudge factor to avoid
345 * truncation errors (since we can only allocate in terms of whole
346 * pages).
347 */
348
349 bucketcount = uvmexp.ncolors * VM_NFREELIST;
350 pagecount = ((freepages + 1) << PAGE_SHIFT) /
351 (PAGE_SIZE + sizeof(struct vm_page));
352
353 bucketarray = (void *)uvm_pageboot_alloc((bucketcount *
354 sizeof(struct pgflbucket)) + (pagecount *
355 sizeof(struct vm_page)));
356 pagearray = (struct vm_page *)(bucketarray + bucketcount);
357
358 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
359 uvm.page_free[lcv].pgfl_buckets =
360 (bucketarray + (lcv * uvmexp.ncolors));
361 uvm_page_init_buckets(&uvm.page_free[lcv]);
362 }
363 memset(pagearray, 0, pagecount * sizeof(struct vm_page));
364
365 /*
366 * init the vm_page structures and put them in the correct place.
367 */
368
369 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
370 n = vm_physmem[lcv].end - vm_physmem[lcv].start;
371
372 /* set up page array pointers */
373 vm_physmem[lcv].pgs = pagearray;
374 pagearray += n;
375 pagecount -= n;
376 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
377
378 /* init and free vm_pages (we've already zeroed them) */
379 paddr = ptoa(vm_physmem[lcv].start);
380 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
381 vm_physmem[lcv].pgs[i].phys_addr = paddr;
382 #ifdef __HAVE_VM_PAGE_MD
383 VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]);
384 #endif
385 if (atop(paddr) >= vm_physmem[lcv].avail_start &&
386 atop(paddr) <= vm_physmem[lcv].avail_end) {
387 uvmexp.npages++;
388 /* add page to free pool */
389 uvm_pagefree(&vm_physmem[lcv].pgs[i]);
390 }
391 }
392 }
393
394 /*
395 * pass up the values of virtual_space_start and
396 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
397 * layers of the VM.
398 */
399
400 *kvm_startp = round_page(virtual_space_start);
401 *kvm_endp = trunc_page(virtual_space_end);
402 #ifdef DEBUG
403 /*
404 * steal kva for uvm_pagezerocheck().
405 */
406 uvm_zerocheckkva = *kvm_startp;
407 *kvm_startp += PAGE_SIZE;
408 #endif /* DEBUG */
409
410 /*
411 * init locks for kernel threads
412 */
413
414 simple_lock_init(&uvm.pagedaemon_lock);
415 simple_lock_init(&uvm.aiodoned_lock);
416
417 /*
418 * init various thresholds.
419 */
420
421 uvmexp.reserve_pagedaemon = 1;
422 uvmexp.reserve_kernel = 5;
423 uvmexp.anonminpct = 10;
424 uvmexp.fileminpct = 10;
425 uvmexp.execminpct = 5;
426 uvmexp.anonmaxpct = 80;
427 uvmexp.filemaxpct = 50;
428 uvmexp.execmaxpct = 30;
429 uvmexp.anonmin = uvmexp.anonminpct * 256 / 100;
430 uvmexp.filemin = uvmexp.fileminpct * 256 / 100;
431 uvmexp.execmin = uvmexp.execminpct * 256 / 100;
432 uvmexp.anonmax = uvmexp.anonmaxpct * 256 / 100;
433 uvmexp.filemax = uvmexp.filemaxpct * 256 / 100;
434 uvmexp.execmax = uvmexp.execmaxpct * 256 / 100;
435 uvm_pctparam_set(&uvmexp.inactivepct, 33);
436
437 /*
438 * determine if we should zero pages in the idle loop.
439 */
440
441 uvm.page_idle_zero = vm_page_zero_enable;
442
443 /*
444 * done!
445 */
446
447 uvm.page_init_done = TRUE;
448 }
449
450 /*
451 * uvm_setpagesize: set the page size
452 *
453 * => sets page_shift and page_mask from uvmexp.pagesize.
454 */
455
456 void
457 uvm_setpagesize(void)
458 {
459
460 /*
461 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
462 * to be a constant (indicated by being a non-zero value).
463 */
464 if (uvmexp.pagesize == 0) {
465 if (PAGE_SIZE == 0)
466 panic("uvm_setpagesize: uvmexp.pagesize not set");
467 uvmexp.pagesize = PAGE_SIZE;
468 }
469 uvmexp.pagemask = uvmexp.pagesize - 1;
470 if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
471 panic("uvm_setpagesize: page size not a power of two");
472 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
473 if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
474 break;
475 }
476
477 /*
478 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
479 */
480
481 vaddr_t
482 uvm_pageboot_alloc(vsize_t size)
483 {
484 static boolean_t initialized = FALSE;
485 vaddr_t addr;
486 #if !defined(PMAP_STEAL_MEMORY)
487 vaddr_t vaddr;
488 paddr_t paddr;
489 #endif
490
491 /*
492 * on first call to this function, initialize ourselves.
493 */
494 if (initialized == FALSE) {
495 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
496
497 /* round it the way we like it */
498 virtual_space_start = round_page(virtual_space_start);
499 virtual_space_end = trunc_page(virtual_space_end);
500
501 initialized = TRUE;
502 }
503
504 /* round to page size */
505 size = round_page(size);
506
507 #if defined(PMAP_STEAL_MEMORY)
508
509 /*
510 * defer bootstrap allocation to MD code (it may want to allocate
511 * from a direct-mapped segment). pmap_steal_memory should adjust
512 * virtual_space_start/virtual_space_end if necessary.
513 */
514
515 addr = pmap_steal_memory(size, &virtual_space_start,
516 &virtual_space_end);
517
518 return(addr);
519
520 #else /* !PMAP_STEAL_MEMORY */
521
522 /*
523 * allocate virtual memory for this request
524 */
525 if (virtual_space_start == virtual_space_end ||
526 (virtual_space_end - virtual_space_start) < size)
527 panic("uvm_pageboot_alloc: out of virtual space");
528
529 addr = virtual_space_start;
530
531 #ifdef PMAP_GROWKERNEL
532 /*
533 * If the kernel pmap can't map the requested space,
534 * then allocate more resources for it.
535 */
536 if (uvm_maxkaddr < (addr + size)) {
537 uvm_maxkaddr = pmap_growkernel(addr + size);
538 if (uvm_maxkaddr < (addr + size))
539 panic("uvm_pageboot_alloc: pmap_growkernel() failed");
540 }
541 #endif
542
543 virtual_space_start += size;
544
545 /*
546 * allocate and mapin physical pages to back new virtual pages
547 */
548
549 for (vaddr = round_page(addr) ; vaddr < addr + size ;
550 vaddr += PAGE_SIZE) {
551
552 if (!uvm_page_physget(&paddr))
553 panic("uvm_pageboot_alloc: out of memory");
554
555 /*
556 * Note this memory is no longer managed, so using
557 * pmap_kenter is safe.
558 */
559 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
560 }
561 pmap_update(pmap_kernel());
562 return(addr);
563 #endif /* PMAP_STEAL_MEMORY */
564 }
565
566 #if !defined(PMAP_STEAL_MEMORY)
567 /*
568 * uvm_page_physget: "steal" one page from the vm_physmem structure.
569 *
570 * => attempt to allocate it off the end of a segment in which the "avail"
571 * values match the start/end values. if we can't do that, then we
572 * will advance both values (making them equal, and removing some
573 * vm_page structures from the non-avail area).
574 * => return false if out of memory.
575 */
576
577 /* subroutine: try to allocate from memory chunks on the specified freelist */
578 static boolean_t uvm_page_physget_freelist(paddr_t *, int);
579
580 static boolean_t
581 uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
582 {
583 int lcv, x;
584
585 /* pass 1: try allocating from a matching end */
586 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
587 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
588 #else
589 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
590 #endif
591 {
592
593 if (uvm.page_init_done == TRUE)
594 panic("uvm_page_physget: called _after_ bootstrap");
595
596 if (vm_physmem[lcv].free_list != freelist)
597 continue;
598
599 /* try from front */
600 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
601 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
602 *paddrp = ptoa(vm_physmem[lcv].avail_start);
603 vm_physmem[lcv].avail_start++;
604 vm_physmem[lcv].start++;
605 /* nothing left? nuke it */
606 if (vm_physmem[lcv].avail_start ==
607 vm_physmem[lcv].end) {
608 if (vm_nphysseg == 1)
609 panic("uvm_page_physget: out of memory!");
610 vm_nphysseg--;
611 for (x = lcv ; x < vm_nphysseg ; x++)
612 /* structure copy */
613 vm_physmem[x] = vm_physmem[x+1];
614 }
615 return (TRUE);
616 }
617
618 /* try from rear */
619 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
620 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
621 *paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
622 vm_physmem[lcv].avail_end--;
623 vm_physmem[lcv].end--;
624 /* nothing left? nuke it */
625 if (vm_physmem[lcv].avail_end ==
626 vm_physmem[lcv].start) {
627 if (vm_nphysseg == 1)
628 panic("uvm_page_physget: out of memory!");
629 vm_nphysseg--;
630 for (x = lcv ; x < vm_nphysseg ; x++)
631 /* structure copy */
632 vm_physmem[x] = vm_physmem[x+1];
633 }
634 return (TRUE);
635 }
636 }
637
638 /* pass2: forget about matching ends, just allocate something */
639 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
640 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
641 #else
642 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
643 #endif
644 {
645
646 /* any room in this bank? */
647 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
648 continue; /* nope */
649
650 *paddrp = ptoa(vm_physmem[lcv].avail_start);
651 vm_physmem[lcv].avail_start++;
652 /* truncate! */
653 vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
654
655 /* nothing left? nuke it */
656 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
657 if (vm_nphysseg == 1)
658 panic("uvm_page_physget: out of memory!");
659 vm_nphysseg--;
660 for (x = lcv ; x < vm_nphysseg ; x++)
661 /* structure copy */
662 vm_physmem[x] = vm_physmem[x+1];
663 }
664 return (TRUE);
665 }
666
667 return (FALSE); /* whoops! */
668 }
669
670 boolean_t
671 uvm_page_physget(paddr_t *paddrp)
672 {
673 int i;
674
675 /* try in the order of freelist preference */
676 for (i = 0; i < VM_NFREELIST; i++)
677 if (uvm_page_physget_freelist(paddrp, i) == TRUE)
678 return (TRUE);
679 return (FALSE);
680 }
681 #endif /* PMAP_STEAL_MEMORY */
682
683 /*
684 * uvm_page_physload: load physical memory into VM system
685 *
686 * => all args are PFs
687 * => all pages in start/end get vm_page structures
688 * => areas marked by avail_start/avail_end get added to the free page pool
689 * => we are limited to VM_PHYSSEG_MAX physical memory segments
690 */
691
692 void
693 uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
694 paddr_t avail_end, int free_list)
695 {
696 int preload, lcv;
697 psize_t npages;
698 struct vm_page *pgs;
699 struct vm_physseg *ps;
700
701 if (uvmexp.pagesize == 0)
702 panic("uvm_page_physload: page size not set!");
703 if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
704 panic("uvm_page_physload: bad free list %d", free_list);
705 if (start >= end)
706 panic("uvm_page_physload: start >= end");
707
708 /*
709 * do we have room?
710 */
711
712 if (vm_nphysseg == VM_PHYSSEG_MAX) {
713 printf("uvm_page_physload: unable to load physical memory "
714 "segment\n");
715 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
716 VM_PHYSSEG_MAX, (long long)start, (long long)end);
717 printf("\tincrease VM_PHYSSEG_MAX\n");
718 return;
719 }
720
721 /*
722 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
723 * called yet, so malloc is not available).
724 */
725
726 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
727 if (vm_physmem[lcv].pgs)
728 break;
729 }
730 preload = (lcv == vm_nphysseg);
731
732 /*
733 * if VM is already running, attempt to malloc() vm_page structures
734 */
735
736 if (!preload) {
737 #if defined(VM_PHYSSEG_NOADD)
738 panic("uvm_page_physload: tried to add RAM after vm_mem_init");
739 #else
740 /* XXXCDC: need some sort of lockout for this case */
741 paddr_t paddr;
742 npages = end - start; /* # of pages */
743 pgs = malloc(sizeof(struct vm_page) * npages,
744 M_VMPAGE, M_NOWAIT);
745 if (pgs == NULL) {
746 printf("uvm_page_physload: can not malloc vm_page "
747 "structs for segment\n");
748 printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
749 return;
750 }
751 /* zero data, init phys_addr and free_list, and free pages */
752 memset(pgs, 0, sizeof(struct vm_page) * npages);
753 for (lcv = 0, paddr = ptoa(start) ;
754 lcv < npages ; lcv++, paddr += PAGE_SIZE) {
755 pgs[lcv].phys_addr = paddr;
756 pgs[lcv].free_list = free_list;
757 if (atop(paddr) >= avail_start &&
758 atop(paddr) <= avail_end)
759 uvm_pagefree(&pgs[lcv]);
760 }
761 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */
762 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
763 #endif
764 } else {
765 pgs = NULL;
766 npages = 0;
767 }
768
769 /*
770 * now insert us in the proper place in vm_physmem[]
771 */
772
773 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
774 /* random: put it at the end (easy!) */
775 ps = &vm_physmem[vm_nphysseg];
776 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
777 {
778 int x;
779 /* sort by address for binary search */
780 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
781 if (start < vm_physmem[lcv].start)
782 break;
783 ps = &vm_physmem[lcv];
784 /* move back other entries, if necessary ... */
785 for (x = vm_nphysseg ; x > lcv ; x--)
786 /* structure copy */
787 vm_physmem[x] = vm_physmem[x - 1];
788 }
789 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
790 {
791 int x;
792 /* sort by largest segment first */
793 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
794 if ((end - start) >
795 (vm_physmem[lcv].end - vm_physmem[lcv].start))
796 break;
797 ps = &vm_physmem[lcv];
798 /* move back other entries, if necessary ... */
799 for (x = vm_nphysseg ; x > lcv ; x--)
800 /* structure copy */
801 vm_physmem[x] = vm_physmem[x - 1];
802 }
803 #else
804 panic("uvm_page_physload: unknown physseg strategy selected!");
805 #endif
806
807 ps->start = start;
808 ps->end = end;
809 ps->avail_start = avail_start;
810 ps->avail_end = avail_end;
811 if (preload) {
812 ps->pgs = NULL;
813 } else {
814 ps->pgs = pgs;
815 ps->lastpg = pgs + npages - 1;
816 }
817 ps->free_list = free_list;
818 vm_nphysseg++;
819
820 if (!preload)
821 uvm_page_rehash();
822 }
823
824 /*
825 * uvm_page_rehash: reallocate hash table based on number of free pages.
826 */
827
828 void
829 uvm_page_rehash(void)
830 {
831 int freepages, lcv, bucketcount, oldcount;
832 struct pglist *newbuckets, *oldbuckets;
833 struct vm_page *pg;
834 size_t newsize, oldsize;
835
836 /*
837 * compute number of pages that can go in the free pool
838 */
839
840 freepages = 0;
841 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
842 freepages +=
843 (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
844
845 /*
846 * compute number of buckets needed for this number of pages
847 */
848
849 bucketcount = 1;
850 while (bucketcount < freepages)
851 bucketcount = bucketcount * 2;
852
853 /*
854 * compute the size of the current table and new table.
855 */
856
857 oldbuckets = uvm.page_hash;
858 oldcount = uvm.page_nhash;
859 oldsize = round_page(sizeof(struct pglist) * oldcount);
860 newsize = round_page(sizeof(struct pglist) * bucketcount);
861
862 /*
863 * allocate the new buckets
864 */
865
866 newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize,
867 0, UVM_KMF_WIRED);
868 if (newbuckets == NULL) {
869 printf("uvm_page_physrehash: WARNING: could not grow page "
870 "hash table\n");
871 return;
872 }
873 for (lcv = 0 ; lcv < bucketcount ; lcv++)
874 TAILQ_INIT(&newbuckets[lcv]);
875
876 /*
877 * now replace the old buckets with the new ones and rehash everything
878 */
879
880 simple_lock(&uvm.hashlock);
881 uvm.page_hash = newbuckets;
882 uvm.page_nhash = bucketcount;
883 uvm.page_hashmask = bucketcount - 1; /* power of 2 */
884
885 /* ... and rehash */
886 for (lcv = 0 ; lcv < oldcount ; lcv++) {
887 while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
888 TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
889 TAILQ_INSERT_TAIL(
890 &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
891 pg, hashq);
892 }
893 }
894 simple_unlock(&uvm.hashlock);
895
896 /*
897 * free old bucket array if is not the boot-time table
898 */
899
900 if (oldbuckets != &uvm_bootbucket)
901 uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize,
902 UVM_KMF_WIRED);
903 }
904
905 /*
906 * uvm_page_recolor: Recolor the pages if the new bucket count is
907 * larger than the old one.
908 */
909
910 void
911 uvm_page_recolor(int newncolors)
912 {
913 struct pgflbucket *bucketarray, *oldbucketarray;
914 struct pgfreelist pgfl;
915 struct vm_page *pg;
916 vsize_t bucketcount;
917 int s, lcv, color, i, ocolors;
918
919 if (newncolors <= uvmexp.ncolors)
920 return;
921
922 if (uvm.page_init_done == FALSE) {
923 uvmexp.ncolors = newncolors;
924 return;
925 }
926
927 bucketcount = newncolors * VM_NFREELIST;
928 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket),
929 M_VMPAGE, M_NOWAIT);
930 if (bucketarray == NULL) {
931 printf("WARNING: unable to allocate %ld page color buckets\n",
932 (long) bucketcount);
933 return;
934 }
935
936 s = uvm_lock_fpageq();
937
938 /* Make sure we should still do this. */
939 if (newncolors <= uvmexp.ncolors) {
940 uvm_unlock_fpageq(s);
941 free(bucketarray, M_VMPAGE);
942 return;
943 }
944
945 oldbucketarray = uvm.page_free[0].pgfl_buckets;
946 ocolors = uvmexp.ncolors;
947
948 uvmexp.ncolors = newncolors;
949 uvmexp.colormask = uvmexp.ncolors - 1;
950
951 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
952 pgfl.pgfl_buckets = (bucketarray + (lcv * newncolors));
953 uvm_page_init_buckets(&pgfl);
954 for (color = 0; color < ocolors; color++) {
955 for (i = 0; i < PGFL_NQUEUES; i++) {
956 while ((pg = TAILQ_FIRST(&uvm.page_free[
957 lcv].pgfl_buckets[color].pgfl_queues[i]))
958 != NULL) {
959 TAILQ_REMOVE(&uvm.page_free[
960 lcv].pgfl_buckets[
961 color].pgfl_queues[i], pg, pageq);
962 TAILQ_INSERT_TAIL(&pgfl.pgfl_buckets[
963 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
964 i], pg, pageq);
965 }
966 }
967 }
968 uvm.page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
969 }
970
971 if (have_recolored_pages) {
972 uvm_unlock_fpageq(s);
973 free(oldbucketarray, M_VMPAGE);
974 return;
975 }
976
977 have_recolored_pages = TRUE;
978 uvm_unlock_fpageq(s);
979 }
980
981 /*
982 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
983 */
984
985 static inline struct vm_page *
986 uvm_pagealloc_pgfl(struct pgfreelist *pgfl, int try1, int try2,
987 int *trycolorp)
988 {
989 struct pglist *freeq;
990 struct vm_page *pg;
991 int color, trycolor = *trycolorp;
992
993 color = trycolor;
994 do {
995 if ((pg = TAILQ_FIRST((freeq =
996 &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL)
997 goto gotit;
998 if ((pg = TAILQ_FIRST((freeq =
999 &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL)
1000 goto gotit;
1001 color = (color + 1) & uvmexp.colormask;
1002 } while (color != trycolor);
1003
1004 return (NULL);
1005
1006 gotit:
1007 TAILQ_REMOVE(freeq, pg, pageq);
1008 uvmexp.free--;
1009
1010 /* update zero'd page count */
1011 if (pg->flags & PG_ZERO)
1012 uvmexp.zeropages--;
1013
1014 if (color == trycolor)
1015 uvmexp.colorhit++;
1016 else {
1017 uvmexp.colormiss++;
1018 *trycolorp = color;
1019 }
1020
1021 return (pg);
1022 }
1023
1024 /*
1025 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
1026 *
1027 * => return null if no pages free
1028 * => wake up pagedaemon if number of free pages drops below low water mark
1029 * => if obj != NULL, obj must be locked (to put in hash)
1030 * => if anon != NULL, anon must be locked (to put in anon)
1031 * => only one of obj or anon can be non-null
1032 * => caller must activate/deactivate page if it is not wired.
1033 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
1034 * => policy decision: it is more important to pull a page off of the
1035 * appropriate priority free list than it is to get a zero'd or
1036 * unknown contents page. This is because we live with the
1037 * consequences of a bad free list decision for the entire
1038 * lifetime of the page, e.g. if the page comes from memory that
1039 * is slower to access.
1040 */
1041
1042 struct vm_page *
1043 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
1044 int flags, int strat, int free_list)
1045 {
1046 int lcv, try1, try2, s, zeroit = 0, color;
1047 struct vm_page *pg;
1048 boolean_t use_reserve;
1049
1050 KASSERT(obj == NULL || anon == NULL);
1051 KASSERT(off == trunc_page(off));
1052 LOCK_ASSERT(obj == NULL || simple_lock_held(&obj->vmobjlock));
1053 LOCK_ASSERT(anon == NULL || simple_lock_held(&anon->an_lock));
1054
1055 s = uvm_lock_fpageq();
1056
1057 /*
1058 * This implements a global round-robin page coloring
1059 * algorithm.
1060 *
1061 * XXXJRT: Should we make the `nextcolor' per-CPU?
1062 * XXXJRT: What about virtually-indexed caches?
1063 */
1064
1065 color = uvm.page_free_nextcolor;
1066
1067 /*
1068 * check to see if we need to generate some free pages waking
1069 * the pagedaemon.
1070 */
1071
1072 UVM_KICK_PDAEMON();
1073
1074 /*
1075 * fail if any of these conditions is true:
1076 * [1] there really are no free pages, or
1077 * [2] only kernel "reserved" pages remain and
1078 * the page isn't being allocated to a kernel object.
1079 * [3] only pagedaemon "reserved" pages remain and
1080 * the requestor isn't the pagedaemon.
1081 */
1082
1083 use_reserve = (flags & UVM_PGA_USERESERVE) ||
1084 (obj && UVM_OBJ_IS_KERN_OBJECT(obj));
1085 if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
1086 (uvmexp.free <= uvmexp.reserve_pagedaemon &&
1087 !(use_reserve && curproc == uvm.pagedaemon_proc)))
1088 goto fail;
1089
1090 #if PGFL_NQUEUES != 2
1091 #error uvm_pagealloc_strat needs to be updated
1092 #endif
1093
1094 /*
1095 * If we want a zero'd page, try the ZEROS queue first, otherwise
1096 * we try the UNKNOWN queue first.
1097 */
1098 if (flags & UVM_PGA_ZERO) {
1099 try1 = PGFL_ZEROS;
1100 try2 = PGFL_UNKNOWN;
1101 } else {
1102 try1 = PGFL_UNKNOWN;
1103 try2 = PGFL_ZEROS;
1104 }
1105
1106 again:
1107 switch (strat) {
1108 case UVM_PGA_STRAT_NORMAL:
1109 /* Check all freelists in descending priority order. */
1110 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
1111 pg = uvm_pagealloc_pgfl(&uvm.page_free[lcv],
1112 try1, try2, &color);
1113 if (pg != NULL)
1114 goto gotit;
1115 }
1116
1117 /* No pages free! */
1118 goto fail;
1119
1120 case UVM_PGA_STRAT_ONLY:
1121 case UVM_PGA_STRAT_FALLBACK:
1122 /* Attempt to allocate from the specified free list. */
1123 KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
1124 pg = uvm_pagealloc_pgfl(&uvm.page_free[free_list],
1125 try1, try2, &color);
1126 if (pg != NULL)
1127 goto gotit;
1128
1129 /* Fall back, if possible. */
1130 if (strat == UVM_PGA_STRAT_FALLBACK) {
1131 strat = UVM_PGA_STRAT_NORMAL;
1132 goto again;
1133 }
1134
1135 /* No pages free! */
1136 goto fail;
1137
1138 default:
1139 panic("uvm_pagealloc_strat: bad strat %d", strat);
1140 /* NOTREACHED */
1141 }
1142
1143 gotit:
1144 /*
1145 * We now know which color we actually allocated from; set
1146 * the next color accordingly.
1147 */
1148
1149 uvm.page_free_nextcolor = (color + 1) & uvmexp.colormask;
1150
1151 /*
1152 * update allocation statistics and remember if we have to
1153 * zero the page
1154 */
1155
1156 if (flags & UVM_PGA_ZERO) {
1157 if (pg->flags & PG_ZERO) {
1158 uvmexp.pga_zerohit++;
1159 zeroit = 0;
1160 } else {
1161 uvmexp.pga_zeromiss++;
1162 zeroit = 1;
1163 }
1164 }
1165 uvm_unlock_fpageq(s);
1166
1167 pg->offset = off;
1168 pg->uobject = obj;
1169 pg->uanon = anon;
1170 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
1171 if (anon) {
1172 anon->an_page = pg;
1173 pg->pqflags = PQ_ANON;
1174 uvmexp.anonpages++;
1175 } else {
1176 if (obj) {
1177 uvm_pageinsert(pg);
1178 }
1179 pg->pqflags = 0;
1180 }
1181 #if defined(UVM_PAGE_TRKOWN)
1182 pg->owner_tag = NULL;
1183 #endif
1184 UVM_PAGE_OWN(pg, "new alloc");
1185
1186 if (flags & UVM_PGA_ZERO) {
1187 /*
1188 * A zero'd page is not clean. If we got a page not already
1189 * zero'd, then we have to zero it ourselves.
1190 */
1191 pg->flags &= ~PG_CLEAN;
1192 if (zeroit)
1193 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1194 }
1195
1196 return(pg);
1197
1198 fail:
1199 uvm_unlock_fpageq(s);
1200 return (NULL);
1201 }
1202
1203 /*
1204 * uvm_pagereplace: replace a page with another
1205 *
1206 * => object must be locked
1207 */
1208
1209 void
1210 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg)
1211 {
1212
1213 KASSERT((oldpg->flags & PG_TABLED) != 0);
1214 KASSERT(oldpg->uobject != NULL);
1215 KASSERT((newpg->flags & PG_TABLED) == 0);
1216 KASSERT(newpg->uobject == NULL);
1217 LOCK_ASSERT(simple_lock_held(&oldpg->uobject->vmobjlock));
1218
1219 newpg->uobject = oldpg->uobject;
1220 newpg->offset = oldpg->offset;
1221
1222 uvm_pageinsert_after(newpg, oldpg);
1223 uvm_pageremove(oldpg);
1224 }
1225
1226 /*
1227 * uvm_pagerealloc: reallocate a page from one object to another
1228 *
1229 * => both objects must be locked
1230 */
1231
1232 void
1233 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
1234 {
1235 /*
1236 * remove it from the old object
1237 */
1238
1239 if (pg->uobject) {
1240 uvm_pageremove(pg);
1241 }
1242
1243 /*
1244 * put it in the new object
1245 */
1246
1247 if (newobj) {
1248 pg->uobject = newobj;
1249 pg->offset = newoff;
1250 uvm_pageinsert(pg);
1251 }
1252 }
1253
1254 #ifdef DEBUG
1255 /*
1256 * check if page is zero-filled
1257 *
1258 * - called with free page queue lock held.
1259 */
1260 void
1261 uvm_pagezerocheck(struct vm_page *pg)
1262 {
1263 int *p, *ep;
1264
1265 KASSERT(uvm_zerocheckkva != 0);
1266 LOCK_ASSERT(simple_lock_held(&uvm.fpageqlock));
1267
1268 /*
1269 * XXX assuming pmap_kenter_pa and pmap_kremove never call
1270 * uvm page allocator.
1271 *
1272 * it might be better to have "CPU-local temporary map" pmap interface.
1273 */
1274 pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ);
1275 p = (int *)uvm_zerocheckkva;
1276 ep = (int *)((char *)p + PAGE_SIZE);
1277 pmap_update(pmap_kernel());
1278 while (p < ep) {
1279 if (*p != 0)
1280 panic("PG_ZERO page isn't zero-filled");
1281 p++;
1282 }
1283 pmap_kremove(uvm_zerocheckkva, PAGE_SIZE);
1284 }
1285 #endif /* DEBUG */
1286
1287 /*
1288 * uvm_pagefree: free page
1289 *
1290 * => erase page's identity (i.e. remove from hash/object)
1291 * => put page on free list
1292 * => caller must lock owning object (either anon or uvm_object)
1293 * => caller must lock page queues
1294 * => assumes all valid mappings of pg are gone
1295 */
1296
1297 void
1298 uvm_pagefree(struct vm_page *pg)
1299 {
1300 int s;
1301 struct pglist *pgfl;
1302 boolean_t iszero;
1303
1304 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1305 LOCK_ASSERT(simple_lock_held(&uvm.pageqlock) ||
1306 (pg->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) == 0);
1307 LOCK_ASSERT(pg->uobject == NULL ||
1308 simple_lock_held(&pg->uobject->vmobjlock));
1309 LOCK_ASSERT(pg->uobject != NULL || pg->uanon == NULL ||
1310 simple_lock_held(&pg->uanon->an_lock));
1311
1312 #ifdef DEBUG
1313 if (pg->uobject == (void *)0xdeadbeef &&
1314 pg->uanon == (void *)0xdeadbeef) {
1315 panic("uvm_pagefree: freeing free page %p", pg);
1316 }
1317 #endif /* DEBUG */
1318
1319 /*
1320 * if the page is loaned, resolve the loan instead of freeing.
1321 */
1322
1323 if (pg->loan_count) {
1324 KASSERT(pg->wire_count == 0);
1325
1326 /*
1327 * if the page is owned by an anon then we just want to
1328 * drop anon ownership. the kernel will free the page when
1329 * it is done with it. if the page is owned by an object,
1330 * remove it from the object and mark it dirty for the benefit
1331 * of possible anon owners.
1332 *
1333 * regardless of previous ownership, wakeup any waiters,
1334 * unbusy the page, and we're done.
1335 */
1336
1337 if (pg->uobject != NULL) {
1338 uvm_pageremove(pg);
1339 pg->flags &= ~PG_CLEAN;
1340 } else if (pg->uanon != NULL) {
1341 if ((pg->pqflags & PQ_ANON) == 0) {
1342 pg->loan_count--;
1343 } else {
1344 pg->pqflags &= ~PQ_ANON;
1345 uvmexp.anonpages--;
1346 }
1347 pg->uanon->an_page = NULL;
1348 pg->uanon = NULL;
1349 }
1350 if (pg->flags & PG_WANTED) {
1351 wakeup(pg);
1352 }
1353 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
1354 #ifdef UVM_PAGE_TRKOWN
1355 pg->owner_tag = NULL;
1356 #endif
1357 if (pg->loan_count) {
1358 uvm_pagedequeue(pg);
1359 return;
1360 }
1361 }
1362
1363 /*
1364 * remove page from its object or anon.
1365 */
1366
1367 if (pg->uobject != NULL) {
1368 uvm_pageremove(pg);
1369 } else if (pg->uanon != NULL) {
1370 pg->uanon->an_page = NULL;
1371 uvmexp.anonpages--;
1372 }
1373
1374 /*
1375 * now remove the page from the queues.
1376 */
1377
1378 uvm_pagedequeue(pg);
1379
1380 /*
1381 * if the page was wired, unwire it now.
1382 */
1383
1384 if (pg->wire_count) {
1385 pg->wire_count = 0;
1386 uvmexp.wired--;
1387 }
1388
1389 /*
1390 * and put on free queue
1391 */
1392
1393 iszero = (pg->flags & PG_ZERO);
1394 pgfl = &uvm.page_free[uvm_page_lookup_freelist(pg)].
1395 pgfl_buckets[VM_PGCOLOR_BUCKET(pg)].
1396 pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN];
1397
1398 pg->pqflags = PQ_FREE;
1399 #ifdef DEBUG
1400 pg->uobject = (void *)0xdeadbeef;
1401 pg->offset = 0xdeadbeef;
1402 pg->uanon = (void *)0xdeadbeef;
1403 #endif
1404
1405 s = uvm_lock_fpageq();
1406
1407 #ifdef DEBUG
1408 if (iszero)
1409 uvm_pagezerocheck(pg);
1410 #endif /* DEBUG */
1411
1412 TAILQ_INSERT_HEAD(pgfl, pg, pageq);
1413 uvmexp.free++;
1414 if (iszero)
1415 uvmexp.zeropages++;
1416
1417 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
1418 uvm.page_idle_zero = vm_page_zero_enable;
1419
1420 uvm_unlock_fpageq(s);
1421 }
1422
1423 /*
1424 * uvm_page_unbusy: unbusy an array of pages.
1425 *
1426 * => pages must either all belong to the same object, or all belong to anons.
1427 * => if pages are object-owned, object must be locked.
1428 * => if pages are anon-owned, anons must be locked.
1429 * => caller must lock page queues if pages may be released.
1430 * => caller must make sure that anon-owned pages are not PG_RELEASED.
1431 */
1432
1433 void
1434 uvm_page_unbusy(struct vm_page **pgs, int npgs)
1435 {
1436 struct vm_page *pg;
1437 int i;
1438 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
1439
1440 for (i = 0; i < npgs; i++) {
1441 pg = pgs[i];
1442 if (pg == NULL || pg == PGO_DONTCARE) {
1443 continue;
1444 }
1445
1446 LOCK_ASSERT(pg->uobject == NULL ||
1447 simple_lock_held(&pg->uobject->vmobjlock));
1448 LOCK_ASSERT(pg->uobject != NULL ||
1449 (pg->uanon != NULL &&
1450 simple_lock_held(&pg->uanon->an_lock)));
1451
1452 KASSERT(pg->flags & PG_BUSY);
1453 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1454 if (pg->flags & PG_WANTED) {
1455 wakeup(pg);
1456 }
1457 if (pg->flags & PG_RELEASED) {
1458 UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0);
1459 KASSERT(pg->uobject != NULL ||
1460 (pg->uanon != NULL && pg->uanon->an_ref > 0));
1461 pg->flags &= ~PG_RELEASED;
1462 uvm_pagefree(pg);
1463 } else {
1464 UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0);
1465 pg->flags &= ~(PG_WANTED|PG_BUSY);
1466 UVM_PAGE_OWN(pg, NULL);
1467 }
1468 }
1469 }
1470
1471 #if defined(UVM_PAGE_TRKOWN)
1472 /*
1473 * uvm_page_own: set or release page ownership
1474 *
1475 * => this is a debugging function that keeps track of who sets PG_BUSY
1476 * and where they do it. it can be used to track down problems
1477 * such a process setting "PG_BUSY" and never releasing it.
1478 * => page's object [if any] must be locked
1479 * => if "tag" is NULL then we are releasing page ownership
1480 */
1481 void
1482 uvm_page_own(struct vm_page *pg, const char *tag)
1483 {
1484 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
1485
1486 /* gain ownership? */
1487 if (tag) {
1488 if (pg->owner_tag) {
1489 printf("uvm_page_own: page %p already owned "
1490 "by proc %d [%s]\n", pg,
1491 pg->owner, pg->owner_tag);
1492 panic("uvm_page_own");
1493 }
1494 pg->owner = (curproc) ? curproc->p_pid : (pid_t) -1;
1495 pg->owner_tag = tag;
1496 return;
1497 }
1498
1499 /* drop ownership */
1500 if (pg->owner_tag == NULL) {
1501 printf("uvm_page_own: dropping ownership of an non-owned "
1502 "page (%p)\n", pg);
1503 panic("uvm_page_own");
1504 }
1505 KASSERT((pg->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) ||
1506 (pg->uanon == NULL && pg->uobject == NULL) ||
1507 pg->uobject == uvm.kernel_object ||
1508 pg->wire_count > 0 ||
1509 (pg->loan_count == 1 && pg->uanon == NULL) ||
1510 pg->loan_count > 1);
1511 pg->owner_tag = NULL;
1512 }
1513 #endif
1514
1515 /*
1516 * uvm_pageidlezero: zero free pages while the system is idle.
1517 *
1518 * => try to complete one color bucket at a time, to reduce our impact
1519 * on the CPU cache.
1520 * => we loop until we either reach the target or whichqs indicates that
1521 * there is a process ready to run.
1522 */
1523 void
1524 uvm_pageidlezero(void)
1525 {
1526 struct vm_page *pg;
1527 struct pgfreelist *pgfl;
1528 int free_list, s, firstbucket;
1529 static int nextbucket;
1530
1531 KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE);
1532 s = uvm_lock_fpageq();
1533 firstbucket = nextbucket;
1534 do {
1535 if (sched_whichqs != 0)
1536 goto quit;
1537 if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) {
1538 uvm.page_idle_zero = FALSE;
1539 goto quit;
1540 }
1541 for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
1542 pgfl = &uvm.page_free[free_list];
1543 while ((pg = TAILQ_FIRST(&pgfl->pgfl_buckets[
1544 nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
1545 if (sched_whichqs != 0)
1546 goto quit;
1547
1548 TAILQ_REMOVE(&pgfl->pgfl_buckets[
1549 nextbucket].pgfl_queues[PGFL_UNKNOWN],
1550 pg, pageq);
1551 uvmexp.free--;
1552 uvm_unlock_fpageq(s);
1553 KERNEL_UNLOCK();
1554 #ifdef PMAP_PAGEIDLEZERO
1555 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) {
1556
1557 /*
1558 * The machine-dependent code detected
1559 * some reason for us to abort zeroing
1560 * pages, probably because there is a
1561 * process now ready to run.
1562 */
1563
1564 KERNEL_LOCK(
1565 LK_EXCLUSIVE | LK_CANRECURSE);
1566 s = uvm_lock_fpageq();
1567 TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
1568 nextbucket].pgfl_queues[
1569 PGFL_UNKNOWN], pg, pageq);
1570 uvmexp.free++;
1571 uvmexp.zeroaborts++;
1572 goto quit;
1573 }
1574 #else
1575 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1576 #endif /* PMAP_PAGEIDLEZERO */
1577 pg->flags |= PG_ZERO;
1578
1579 KERNEL_LOCK(LK_EXCLUSIVE | LK_CANRECURSE);
1580 s = uvm_lock_fpageq();
1581 TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
1582 nextbucket].pgfl_queues[PGFL_ZEROS],
1583 pg, pageq);
1584 uvmexp.free++;
1585 uvmexp.zeropages++;
1586 }
1587 }
1588 nextbucket = (nextbucket + 1) & uvmexp.colormask;
1589 } while (nextbucket != firstbucket);
1590 quit:
1591 uvm_unlock_fpageq(s);
1592 KERNEL_UNLOCK();
1593 }
1594
1595 /*
1596 * uvm_lock_fpageq: lock the free page queue
1597 *
1598 * => free page queue can be accessed in interrupt context, so this
1599 * blocks all interrupts that can cause memory allocation, and
1600 * returns the previous interrupt level.
1601 */
1602
1603 int
1604 uvm_lock_fpageq(void)
1605 {
1606 int s;
1607
1608 s = splvm();
1609 simple_lock(&uvm.fpageqlock);
1610 return (s);
1611 }
1612
1613 /*
1614 * uvm_unlock_fpageq: unlock the free page queue
1615 *
1616 * => caller must supply interrupt level returned by uvm_lock_fpageq()
1617 * so that it may be restored.
1618 */
1619
1620 void
1621 uvm_unlock_fpageq(int s)
1622 {
1623
1624 simple_unlock(&uvm.fpageqlock);
1625 splx(s);
1626 }
1627
1628 /*
1629 * uvm_pagelookup: look up a page
1630 *
1631 * => caller should lock object to keep someone from pulling the page
1632 * out from under it
1633 */
1634
1635 struct vm_page *
1636 uvm_pagelookup(struct uvm_object *obj, voff_t off)
1637 {
1638 struct vm_page *pg;
1639 struct pglist *buck;
1640
1641 buck = &uvm.page_hash[uvm_pagehash(obj,off)];
1642 simple_lock(&uvm.hashlock);
1643 TAILQ_FOREACH(pg, buck, hashq) {
1644 if (pg->uobject == obj && pg->offset == off) {
1645 break;
1646 }
1647 }
1648 simple_unlock(&uvm.hashlock);
1649 KASSERT(pg == NULL || obj->uo_npages != 0);
1650 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1651 (pg->flags & PG_BUSY) != 0);
1652 return(pg);
1653 }
1654
1655 /*
1656 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1657 *
1658 * => caller must lock page queues
1659 */
1660
1661 void
1662 uvm_pagewire(struct vm_page *pg)
1663 {
1664 UVM_LOCK_ASSERT_PAGEQ();
1665 if (pg->wire_count == 0) {
1666 uvm_pagedequeue(pg);
1667 uvmexp.wired++;
1668 }
1669 pg->wire_count++;
1670 }
1671
1672 /*
1673 * uvm_pageunwire: unwire the page.
1674 *
1675 * => activate if wire count goes to zero.
1676 * => caller must lock page queues
1677 */
1678
1679 void
1680 uvm_pageunwire(struct vm_page *pg)
1681 {
1682 UVM_LOCK_ASSERT_PAGEQ();
1683 pg->wire_count--;
1684 if (pg->wire_count == 0) {
1685 uvm_pageactivate(pg);
1686 uvmexp.wired--;
1687 }
1688 }
1689
1690 /*
1691 * uvm_pagedeactivate: deactivate page
1692 *
1693 * => caller must lock page queues
1694 * => caller must check to make sure page is not wired
1695 * => object that page belongs to must be locked (so we can adjust pg->flags)
1696 * => caller must clear the reference on the page before calling
1697 */
1698
1699 void
1700 uvm_pagedeactivate(struct vm_page *pg)
1701 {
1702 UVM_LOCK_ASSERT_PAGEQ();
1703 if (pg->pqflags & PQ_ACTIVE) {
1704 TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1705 pg->pqflags &= ~PQ_ACTIVE;
1706 uvmexp.active--;
1707 }
1708 if ((pg->pqflags & PQ_INACTIVE) == 0) {
1709 KASSERT(pg->wire_count == 0);
1710 TAILQ_INSERT_TAIL(&uvm.page_inactive, pg, pageq);
1711 pg->pqflags |= PQ_INACTIVE;
1712 uvmexp.inactive++;
1713 }
1714 }
1715
1716 /*
1717 * uvm_pageactivate: activate page
1718 *
1719 * => caller must lock page queues
1720 */
1721
1722 void
1723 uvm_pageactivate(struct vm_page *pg)
1724 {
1725 UVM_LOCK_ASSERT_PAGEQ();
1726 uvm_pagedequeue(pg);
1727 if (pg->wire_count == 0) {
1728 TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
1729 pg->pqflags |= PQ_ACTIVE;
1730 uvmexp.active++;
1731 }
1732 }
1733
1734 /*
1735 * uvm_pagedequeue: remove a page from any paging queue
1736 */
1737
1738 void
1739 uvm_pagedequeue(struct vm_page *pg)
1740 {
1741 if (pg->pqflags & PQ_ACTIVE) {
1742 UVM_LOCK_ASSERT_PAGEQ();
1743 TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1744 pg->pqflags &= ~PQ_ACTIVE;
1745 uvmexp.active--;
1746 } else if (pg->pqflags & PQ_INACTIVE) {
1747 UVM_LOCK_ASSERT_PAGEQ();
1748 TAILQ_REMOVE(&uvm.page_inactive, pg, pageq);
1749 pg->pqflags &= ~PQ_INACTIVE;
1750 uvmexp.inactive--;
1751 }
1752 }
1753
1754 /*
1755 * uvm_pagezero: zero fill a page
1756 *
1757 * => if page is part of an object then the object should be locked
1758 * to protect pg->flags.
1759 */
1760
1761 void
1762 uvm_pagezero(struct vm_page *pg)
1763 {
1764 pg->flags &= ~PG_CLEAN;
1765 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1766 }
1767
1768 /*
1769 * uvm_pagecopy: copy a page
1770 *
1771 * => if page is part of an object then the object should be locked
1772 * to protect pg->flags.
1773 */
1774
1775 void
1776 uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
1777 {
1778
1779 dst->flags &= ~PG_CLEAN;
1780 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
1781 }
1782
1783 /*
1784 * uvm_page_lookup_freelist: look up the free list for the specified page
1785 */
1786
1787 int
1788 uvm_page_lookup_freelist(struct vm_page *pg)
1789 {
1790 int lcv;
1791
1792 lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
1793 KASSERT(lcv != -1);
1794 return (vm_physmem[lcv].free_list);
1795 }
1796