uvm_page.c revision 1.124 1 /* $NetBSD: uvm_page.c,v 1.124 2007/10/08 14:06:15 ad Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
42 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
43 *
44 *
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 */
68
69 /*
70 * uvm_page.c: page ops.
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.124 2007/10/08 14:06:15 ad Exp $");
75
76 #include "opt_uvmhist.h"
77 #include "opt_readahead.h"
78
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/malloc.h>
82 #include <sys/sched.h>
83 #include <sys/kernel.h>
84 #include <sys/vnode.h>
85 #include <sys/proc.h>
86
87 #include <uvm/uvm.h>
88 #include <uvm/uvm_pdpolicy.h>
89
90 /*
91 * global vars... XXXCDC: move to uvm. structure.
92 */
93
94 /*
95 * physical memory config is stored in vm_physmem.
96 */
97
98 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */
99 int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */
100
101 /*
102 * Some supported CPUs in a given architecture don't support all
103 * of the things necessary to do idle page zero'ing efficiently.
104 * We therefore provide a way to disable it from machdep code here.
105 */
106 /*
107 * XXX disabled until we can find a way to do this without causing
108 * problems for either CPU caches or DMA latency.
109 */
110 bool vm_page_zero_enable = false;
111
112 /*
113 * local variables
114 */
115
116 /*
117 * these variables record the values returned by vm_page_bootstrap,
118 * for debugging purposes. The implementation of uvm_pageboot_alloc
119 * and pmap_startup here also uses them internally.
120 */
121
122 static vaddr_t virtual_space_start;
123 static vaddr_t virtual_space_end;
124
125 /*
126 * we use a hash table with only one bucket during bootup. we will
127 * later rehash (resize) the hash table once the allocator is ready.
128 * we static allocate the one bootstrap bucket below...
129 */
130
131 static struct pglist uvm_bootbucket;
132
133 /*
134 * we allocate an initial number of page colors in uvm_page_init(),
135 * and remember them. We may re-color pages as cache sizes are
136 * discovered during the autoconfiguration phase. But we can never
137 * free the initial set of buckets, since they are allocated using
138 * uvm_pageboot_alloc().
139 */
140
141 static bool have_recolored_pages /* = false */;
142
143 MALLOC_DEFINE(M_VMPAGE, "VM page", "VM page");
144
145 #ifdef DEBUG
146 vaddr_t uvm_zerocheckkva;
147 #endif /* DEBUG */
148
149 /*
150 * locks on the hash table. allocated in 32 byte chunks to try
151 * and reduce cache traffic between CPUs.
152 */
153
154 #define UVM_HASHLOCK_CNT 32
155 #define uvm_hashlock(hash) \
156 (&uvm_hashlocks[(hash) & (UVM_HASHLOCK_CNT - 1)].lock)
157
158 static union {
159 kmutex_t lock;
160 uint8_t pad[32];
161 } uvm_hashlocks[UVM_HASHLOCK_CNT] __aligned(32);
162
163 /*
164 * locks on the hash table.
165 */
166
167 #define UVM_HASHLOCK_CNT 32
168 #define uvm_hashlock(hash) (&uvm_hashlocks[(hash) & (UVM_HASHLOCK_CNT - 1)])
169
170 static kmutex_t uvm_hashlocks[UVM_HASHLOCK_CNT];
171
172 /*
173 * local prototypes
174 */
175
176 static void uvm_pageinsert(struct vm_page *);
177 static void uvm_pageinsert_after(struct vm_page *, struct vm_page *);
178 static void uvm_pageremove(struct vm_page *);
179
180 /*
181 * inline functions
182 */
183
184 /*
185 * uvm_pageinsert: insert a page in the object and the hash table
186 * uvm_pageinsert_after: insert a page into the specified place in listq
187 *
188 * => caller must lock object
189 * => caller must lock page queues
190 * => call should have already set pg's object and offset pointers
191 * and bumped the version counter
192 */
193
194 inline static void
195 uvm_pageinsert_after(struct vm_page *pg, struct vm_page *where)
196 {
197 struct pglist *buck;
198 struct uvm_object *uobj = pg->uobject;
199 kmutex_t *lock;
200 u_int hash;
201
202 LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
203 KASSERT((pg->flags & PG_TABLED) == 0);
204 KASSERT(where == NULL || (where->flags & PG_TABLED));
205 KASSERT(where == NULL || (where->uobject == uobj));
206
207 hash = uvm_pagehash(uobj, pg->offset);
208 buck = &uvm.page_hash[hash];
209 lock = uvm_hashlock(hash);
210 mutex_spin_enter(lock);
211 TAILQ_INSERT_TAIL(buck, pg, hashq);
212 mutex_spin_exit(lock);
213
214 if (UVM_OBJ_IS_VNODE(uobj)) {
215 if (uobj->uo_npages == 0) {
216 struct vnode *vp = (struct vnode *)uobj;
217
218 vholdl(vp);
219 }
220 if (UVM_OBJ_IS_VTEXT(uobj)) {
221 uvmexp.execpages++;
222 } else {
223 uvmexp.filepages++;
224 }
225 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
226 uvmexp.anonpages++;
227 }
228
229 if (where)
230 TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq);
231 else
232 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq);
233 pg->flags |= PG_TABLED;
234 uobj->uo_npages++;
235 }
236
237 inline static void
238 uvm_pageinsert(struct vm_page *pg)
239 {
240
241 uvm_pageinsert_after(pg, NULL);
242 }
243
244 /*
245 * uvm_page_remove: remove page from object and hash
246 *
247 * => caller must lock object
248 * => caller must lock page queues
249 */
250
251 static inline void
252 uvm_pageremove(struct vm_page *pg)
253 {
254 struct pglist *buck;
255 struct uvm_object *uobj = pg->uobject;
256 kmutex_t *lock;
257 u_int hash;
258
259 LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
260 KASSERT(pg->flags & PG_TABLED);
261
262 hash = uvm_pagehash(uobj, pg->offset);
263 buck = &uvm.page_hash[hash];
264 lock = uvm_hashlock(hash);
265 mutex_spin_enter(lock);
266 TAILQ_REMOVE(buck, pg, hashq);
267 mutex_spin_exit(lock);
268
269 if (UVM_OBJ_IS_VNODE(uobj)) {
270 if (uobj->uo_npages == 1) {
271 struct vnode *vp = (struct vnode *)uobj;
272
273 holdrelel(vp);
274 }
275 if (UVM_OBJ_IS_VTEXT(uobj)) {
276 uvmexp.execpages--;
277 } else {
278 uvmexp.filepages--;
279 }
280 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
281 uvmexp.anonpages--;
282 }
283
284 /* object should be locked */
285 uobj->uo_npages--;
286 TAILQ_REMOVE(&uobj->memq, pg, listq);
287 pg->flags &= ~PG_TABLED;
288 pg->uobject = NULL;
289 }
290
291 static void
292 uvm_page_init_buckets(struct pgfreelist *pgfl)
293 {
294 int color, i;
295
296 for (color = 0; color < uvmexp.ncolors; color++) {
297 for (i = 0; i < PGFL_NQUEUES; i++) {
298 TAILQ_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]);
299 }
300 }
301 }
302
303 /*
304 * uvm_page_init: init the page system. called from uvm_init().
305 *
306 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
307 */
308
309 void
310 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
311 {
312 vsize_t freepages, pagecount, bucketcount, n;
313 struct pgflbucket *bucketarray;
314 struct vm_page *pagearray;
315 int lcv;
316 u_int i;
317 paddr_t paddr;
318
319 /*
320 * init the page queues and page queue locks, except the free
321 * list; we allocate that later (with the initial vm_page
322 * structures).
323 */
324
325 uvmpdpol_init();
326 simple_lock_init(&uvm.pageqlock);
327 mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM);
328
329 /*
330 * init the <obj,offset> => <page> hash table. for now
331 * we just have one bucket (the bootstrap bucket). later on we
332 * will allocate new buckets as we dynamically resize the hash table.
333 */
334
335 uvm.page_nhash = 1; /* 1 bucket */
336 uvm.page_hashmask = 0; /* mask for hash function */
337 uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */
338 TAILQ_INIT(uvm.page_hash); /* init hash table */
339
340 /*
341 * init hashtable locks. these must be spinlocks, as they are
342 * called from sites in the pmap modules where we cannot block.
343 * if taking multiple locks, the order is: low numbered first,
344 * high numbered second.
345 */
346
347 for (i = 0; i < UVM_HASHLOCK_CNT; i++)
348 mutex_init(&uvm_hashlocks[i].lock, MUTEX_SPIN, IPL_VM);
349
350 /*
351 * allocate vm_page structures.
352 */
353
354 /*
355 * sanity check:
356 * before calling this function the MD code is expected to register
357 * some free RAM with the uvm_page_physload() function. our job
358 * now is to allocate vm_page structures for this memory.
359 */
360
361 if (vm_nphysseg == 0)
362 panic("uvm_page_bootstrap: no memory pre-allocated");
363
364 /*
365 * first calculate the number of free pages...
366 *
367 * note that we use start/end rather than avail_start/avail_end.
368 * this allows us to allocate extra vm_page structures in case we
369 * want to return some memory to the pool after booting.
370 */
371
372 freepages = 0;
373 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
374 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
375
376 /*
377 * Let MD code initialize the number of colors, or default
378 * to 1 color if MD code doesn't care.
379 */
380 if (uvmexp.ncolors == 0)
381 uvmexp.ncolors = 1;
382 uvmexp.colormask = uvmexp.ncolors - 1;
383
384 /*
385 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
386 * use. for each page of memory we use we need a vm_page structure.
387 * thus, the total number of pages we can use is the total size of
388 * the memory divided by the PAGE_SIZE plus the size of the vm_page
389 * structure. we add one to freepages as a fudge factor to avoid
390 * truncation errors (since we can only allocate in terms of whole
391 * pages).
392 */
393
394 bucketcount = uvmexp.ncolors * VM_NFREELIST;
395 pagecount = ((freepages + 1) << PAGE_SHIFT) /
396 (PAGE_SIZE + sizeof(struct vm_page));
397
398 bucketarray = (void *)uvm_pageboot_alloc((bucketcount *
399 sizeof(struct pgflbucket)) + (pagecount *
400 sizeof(struct vm_page)));
401 pagearray = (struct vm_page *)(bucketarray + bucketcount);
402
403 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
404 uvm.page_free[lcv].pgfl_buckets =
405 (bucketarray + (lcv * uvmexp.ncolors));
406 uvm_page_init_buckets(&uvm.page_free[lcv]);
407 }
408 memset(pagearray, 0, pagecount * sizeof(struct vm_page));
409
410 /*
411 * init the vm_page structures and put them in the correct place.
412 */
413
414 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
415 n = vm_physmem[lcv].end - vm_physmem[lcv].start;
416
417 /* set up page array pointers */
418 vm_physmem[lcv].pgs = pagearray;
419 pagearray += n;
420 pagecount -= n;
421 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
422
423 /* init and free vm_pages (we've already zeroed them) */
424 paddr = ptoa(vm_physmem[lcv].start);
425 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
426 vm_physmem[lcv].pgs[i].phys_addr = paddr;
427 #ifdef __HAVE_VM_PAGE_MD
428 VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]);
429 #endif
430 if (atop(paddr) >= vm_physmem[lcv].avail_start &&
431 atop(paddr) <= vm_physmem[lcv].avail_end) {
432 uvmexp.npages++;
433 /* add page to free pool */
434 uvm_pagefree(&vm_physmem[lcv].pgs[i]);
435 }
436 }
437 }
438
439 /*
440 * pass up the values of virtual_space_start and
441 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
442 * layers of the VM.
443 */
444
445 *kvm_startp = round_page(virtual_space_start);
446 *kvm_endp = trunc_page(virtual_space_end);
447 #ifdef DEBUG
448 /*
449 * steal kva for uvm_pagezerocheck().
450 */
451 uvm_zerocheckkva = *kvm_startp;
452 *kvm_startp += PAGE_SIZE;
453 #endif /* DEBUG */
454
455 /*
456 * init locks for kernel threads
457 */
458
459 mutex_init(&uvm_pagedaemon_lock, MUTEX_DEFAULT, IPL_NONE);
460
461 /*
462 * init various thresholds.
463 */
464
465 uvmexp.reserve_pagedaemon = 1;
466 uvmexp.reserve_kernel = 5;
467
468 /*
469 * determine if we should zero pages in the idle loop.
470 */
471
472 uvm.page_idle_zero = vm_page_zero_enable;
473
474 /*
475 * done!
476 */
477
478 uvm.page_init_done = true;
479 }
480
481 /*
482 * uvm_setpagesize: set the page size
483 *
484 * => sets page_shift and page_mask from uvmexp.pagesize.
485 */
486
487 void
488 uvm_setpagesize(void)
489 {
490
491 /*
492 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
493 * to be a constant (indicated by being a non-zero value).
494 */
495 if (uvmexp.pagesize == 0) {
496 if (PAGE_SIZE == 0)
497 panic("uvm_setpagesize: uvmexp.pagesize not set");
498 uvmexp.pagesize = PAGE_SIZE;
499 }
500 uvmexp.pagemask = uvmexp.pagesize - 1;
501 if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
502 panic("uvm_setpagesize: page size not a power of two");
503 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
504 if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
505 break;
506 }
507
508 /*
509 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
510 */
511
512 vaddr_t
513 uvm_pageboot_alloc(vsize_t size)
514 {
515 static bool initialized = false;
516 vaddr_t addr;
517 #if !defined(PMAP_STEAL_MEMORY)
518 vaddr_t vaddr;
519 paddr_t paddr;
520 #endif
521
522 /*
523 * on first call to this function, initialize ourselves.
524 */
525 if (initialized == false) {
526 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
527
528 /* round it the way we like it */
529 virtual_space_start = round_page(virtual_space_start);
530 virtual_space_end = trunc_page(virtual_space_end);
531
532 initialized = true;
533 }
534
535 /* round to page size */
536 size = round_page(size);
537
538 #if defined(PMAP_STEAL_MEMORY)
539
540 /*
541 * defer bootstrap allocation to MD code (it may want to allocate
542 * from a direct-mapped segment). pmap_steal_memory should adjust
543 * virtual_space_start/virtual_space_end if necessary.
544 */
545
546 addr = pmap_steal_memory(size, &virtual_space_start,
547 &virtual_space_end);
548
549 return(addr);
550
551 #else /* !PMAP_STEAL_MEMORY */
552
553 /*
554 * allocate virtual memory for this request
555 */
556 if (virtual_space_start == virtual_space_end ||
557 (virtual_space_end - virtual_space_start) < size)
558 panic("uvm_pageboot_alloc: out of virtual space");
559
560 addr = virtual_space_start;
561
562 #ifdef PMAP_GROWKERNEL
563 /*
564 * If the kernel pmap can't map the requested space,
565 * then allocate more resources for it.
566 */
567 if (uvm_maxkaddr < (addr + size)) {
568 uvm_maxkaddr = pmap_growkernel(addr + size);
569 if (uvm_maxkaddr < (addr + size))
570 panic("uvm_pageboot_alloc: pmap_growkernel() failed");
571 }
572 #endif
573
574 virtual_space_start += size;
575
576 /*
577 * allocate and mapin physical pages to back new virtual pages
578 */
579
580 for (vaddr = round_page(addr) ; vaddr < addr + size ;
581 vaddr += PAGE_SIZE) {
582
583 if (!uvm_page_physget(&paddr))
584 panic("uvm_pageboot_alloc: out of memory");
585
586 /*
587 * Note this memory is no longer managed, so using
588 * pmap_kenter is safe.
589 */
590 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
591 }
592 pmap_update(pmap_kernel());
593 return(addr);
594 #endif /* PMAP_STEAL_MEMORY */
595 }
596
597 #if !defined(PMAP_STEAL_MEMORY)
598 /*
599 * uvm_page_physget: "steal" one page from the vm_physmem structure.
600 *
601 * => attempt to allocate it off the end of a segment in which the "avail"
602 * values match the start/end values. if we can't do that, then we
603 * will advance both values (making them equal, and removing some
604 * vm_page structures from the non-avail area).
605 * => return false if out of memory.
606 */
607
608 /* subroutine: try to allocate from memory chunks on the specified freelist */
609 static bool uvm_page_physget_freelist(paddr_t *, int);
610
611 static bool
612 uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
613 {
614 int lcv, x;
615
616 /* pass 1: try allocating from a matching end */
617 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
618 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
619 #else
620 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
621 #endif
622 {
623
624 if (uvm.page_init_done == true)
625 panic("uvm_page_physget: called _after_ bootstrap");
626
627 if (vm_physmem[lcv].free_list != freelist)
628 continue;
629
630 /* try from front */
631 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
632 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
633 *paddrp = ptoa(vm_physmem[lcv].avail_start);
634 vm_physmem[lcv].avail_start++;
635 vm_physmem[lcv].start++;
636 /* nothing left? nuke it */
637 if (vm_physmem[lcv].avail_start ==
638 vm_physmem[lcv].end) {
639 if (vm_nphysseg == 1)
640 panic("uvm_page_physget: out of memory!");
641 vm_nphysseg--;
642 for (x = lcv ; x < vm_nphysseg ; x++)
643 /* structure copy */
644 vm_physmem[x] = vm_physmem[x+1];
645 }
646 return (true);
647 }
648
649 /* try from rear */
650 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
651 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
652 *paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
653 vm_physmem[lcv].avail_end--;
654 vm_physmem[lcv].end--;
655 /* nothing left? nuke it */
656 if (vm_physmem[lcv].avail_end ==
657 vm_physmem[lcv].start) {
658 if (vm_nphysseg == 1)
659 panic("uvm_page_physget: out of memory!");
660 vm_nphysseg--;
661 for (x = lcv ; x < vm_nphysseg ; x++)
662 /* structure copy */
663 vm_physmem[x] = vm_physmem[x+1];
664 }
665 return (true);
666 }
667 }
668
669 /* pass2: forget about matching ends, just allocate something */
670 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
671 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
672 #else
673 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
674 #endif
675 {
676
677 /* any room in this bank? */
678 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
679 continue; /* nope */
680
681 *paddrp = ptoa(vm_physmem[lcv].avail_start);
682 vm_physmem[lcv].avail_start++;
683 /* truncate! */
684 vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
685
686 /* nothing left? nuke it */
687 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
688 if (vm_nphysseg == 1)
689 panic("uvm_page_physget: out of memory!");
690 vm_nphysseg--;
691 for (x = lcv ; x < vm_nphysseg ; x++)
692 /* structure copy */
693 vm_physmem[x] = vm_physmem[x+1];
694 }
695 return (true);
696 }
697
698 return (false); /* whoops! */
699 }
700
701 bool
702 uvm_page_physget(paddr_t *paddrp)
703 {
704 int i;
705
706 /* try in the order of freelist preference */
707 for (i = 0; i < VM_NFREELIST; i++)
708 if (uvm_page_physget_freelist(paddrp, i) == true)
709 return (true);
710 return (false);
711 }
712 #endif /* PMAP_STEAL_MEMORY */
713
714 /*
715 * uvm_page_physload: load physical memory into VM system
716 *
717 * => all args are PFs
718 * => all pages in start/end get vm_page structures
719 * => areas marked by avail_start/avail_end get added to the free page pool
720 * => we are limited to VM_PHYSSEG_MAX physical memory segments
721 */
722
723 void
724 uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
725 paddr_t avail_end, int free_list)
726 {
727 int preload, lcv;
728 psize_t npages;
729 struct vm_page *pgs;
730 struct vm_physseg *ps;
731
732 if (uvmexp.pagesize == 0)
733 panic("uvm_page_physload: page size not set!");
734 if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
735 panic("uvm_page_physload: bad free list %d", free_list);
736 if (start >= end)
737 panic("uvm_page_physload: start >= end");
738
739 /*
740 * do we have room?
741 */
742
743 if (vm_nphysseg == VM_PHYSSEG_MAX) {
744 printf("uvm_page_physload: unable to load physical memory "
745 "segment\n");
746 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
747 VM_PHYSSEG_MAX, (long long)start, (long long)end);
748 printf("\tincrease VM_PHYSSEG_MAX\n");
749 return;
750 }
751
752 /*
753 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
754 * called yet, so malloc is not available).
755 */
756
757 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
758 if (vm_physmem[lcv].pgs)
759 break;
760 }
761 preload = (lcv == vm_nphysseg);
762
763 /*
764 * if VM is already running, attempt to malloc() vm_page structures
765 */
766
767 if (!preload) {
768 #if defined(VM_PHYSSEG_NOADD)
769 panic("uvm_page_physload: tried to add RAM after vm_mem_init");
770 #else
771 /* XXXCDC: need some sort of lockout for this case */
772 paddr_t paddr;
773 npages = end - start; /* # of pages */
774 pgs = malloc(sizeof(struct vm_page) * npages,
775 M_VMPAGE, M_NOWAIT);
776 if (pgs == NULL) {
777 printf("uvm_page_physload: can not malloc vm_page "
778 "structs for segment\n");
779 printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
780 return;
781 }
782 /* zero data, init phys_addr and free_list, and free pages */
783 memset(pgs, 0, sizeof(struct vm_page) * npages);
784 for (lcv = 0, paddr = ptoa(start) ;
785 lcv < npages ; lcv++, paddr += PAGE_SIZE) {
786 pgs[lcv].phys_addr = paddr;
787 pgs[lcv].free_list = free_list;
788 if (atop(paddr) >= avail_start &&
789 atop(paddr) <= avail_end)
790 uvm_pagefree(&pgs[lcv]);
791 }
792 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */
793 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
794 #endif
795 } else {
796 pgs = NULL;
797 npages = 0;
798 }
799
800 /*
801 * now insert us in the proper place in vm_physmem[]
802 */
803
804 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
805 /* random: put it at the end (easy!) */
806 ps = &vm_physmem[vm_nphysseg];
807 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
808 {
809 int x;
810 /* sort by address for binary search */
811 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
812 if (start < vm_physmem[lcv].start)
813 break;
814 ps = &vm_physmem[lcv];
815 /* move back other entries, if necessary ... */
816 for (x = vm_nphysseg ; x > lcv ; x--)
817 /* structure copy */
818 vm_physmem[x] = vm_physmem[x - 1];
819 }
820 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
821 {
822 int x;
823 /* sort by largest segment first */
824 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
825 if ((end - start) >
826 (vm_physmem[lcv].end - vm_physmem[lcv].start))
827 break;
828 ps = &vm_physmem[lcv];
829 /* move back other entries, if necessary ... */
830 for (x = vm_nphysseg ; x > lcv ; x--)
831 /* structure copy */
832 vm_physmem[x] = vm_physmem[x - 1];
833 }
834 #else
835 panic("uvm_page_physload: unknown physseg strategy selected!");
836 #endif
837
838 ps->start = start;
839 ps->end = end;
840 ps->avail_start = avail_start;
841 ps->avail_end = avail_end;
842 if (preload) {
843 ps->pgs = NULL;
844 } else {
845 ps->pgs = pgs;
846 ps->lastpg = pgs + npages - 1;
847 }
848 ps->free_list = free_list;
849 vm_nphysseg++;
850
851 if (!preload) {
852 uvm_page_rehash();
853 uvmpdpol_reinit();
854 }
855 }
856
857 /*
858 * uvm_page_rehash: reallocate hash table based on number of free pages.
859 */
860
861 void
862 uvm_page_rehash(void)
863 {
864 int freepages, lcv, bucketcount, oldcount, i;
865 struct pglist *newbuckets, *oldbuckets;
866 struct vm_page *pg;
867 size_t newsize, oldsize;
868
869 /*
870 * compute number of pages that can go in the free pool
871 */
872
873 freepages = 0;
874 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
875 freepages +=
876 (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
877
878 /*
879 * compute number of buckets needed for this number of pages
880 */
881
882 bucketcount = 1;
883 while (bucketcount < freepages)
884 bucketcount = bucketcount * 2;
885
886 /*
887 * compute the size of the current table and new table.
888 */
889
890 oldbuckets = uvm.page_hash;
891 oldcount = uvm.page_nhash;
892 oldsize = round_page(sizeof(struct pglist) * oldcount);
893 newsize = round_page(sizeof(struct pglist) * bucketcount);
894
895 /*
896 * allocate the new buckets
897 */
898
899 newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize,
900 0, UVM_KMF_WIRED);
901 if (newbuckets == NULL) {
902 printf("uvm_page_physrehash: WARNING: could not grow page "
903 "hash table\n");
904 return;
905 }
906 for (lcv = 0 ; lcv < bucketcount ; lcv++)
907 TAILQ_INIT(&newbuckets[lcv]);
908
909 /*
910 * now replace the old buckets with the new ones and rehash everything
911 */
912
913 for (i = 0; i < UVM_HASHLOCK_CNT; i++)
914 mutex_spin_enter(&uvm_hashlocks[i].lock);
915
916 uvm.page_hash = newbuckets;
917 uvm.page_nhash = bucketcount;
918 uvm.page_hashmask = bucketcount - 1; /* power of 2 */
919
920 /* ... and rehash */
921 for (lcv = 0 ; lcv < oldcount ; lcv++) {
922 while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
923 TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
924 TAILQ_INSERT_TAIL(
925 &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
926 pg, hashq);
927 }
928 }
929
930 for (i = 0; i < UVM_HASHLOCK_CNT; i++)
931 mutex_spin_exit(&uvm_hashlocks[i].lock);
932
933 /*
934 * free old bucket array if is not the boot-time table
935 */
936
937 if (oldbuckets != &uvm_bootbucket)
938 uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize,
939 UVM_KMF_WIRED);
940 }
941
942 /*
943 * uvm_page_recolor: Recolor the pages if the new bucket count is
944 * larger than the old one.
945 */
946
947 void
948 uvm_page_recolor(int newncolors)
949 {
950 struct pgflbucket *bucketarray, *oldbucketarray;
951 struct pgfreelist pgfl;
952 struct vm_page *pg;
953 vsize_t bucketcount;
954 int lcv, color, i, ocolors;
955
956 if (newncolors <= uvmexp.ncolors)
957 return;
958
959 if (uvm.page_init_done == false) {
960 uvmexp.ncolors = newncolors;
961 return;
962 }
963
964 bucketcount = newncolors * VM_NFREELIST;
965 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket),
966 M_VMPAGE, M_NOWAIT);
967 if (bucketarray == NULL) {
968 printf("WARNING: unable to allocate %ld page color buckets\n",
969 (long) bucketcount);
970 return;
971 }
972
973 mutex_spin_enter(&uvm_fpageqlock);
974
975 /* Make sure we should still do this. */
976 if (newncolors <= uvmexp.ncolors) {
977 mutex_spin_exit(&uvm_fpageqlock);
978 free(bucketarray, M_VMPAGE);
979 return;
980 }
981
982 oldbucketarray = uvm.page_free[0].pgfl_buckets;
983 ocolors = uvmexp.ncolors;
984
985 uvmexp.ncolors = newncolors;
986 uvmexp.colormask = uvmexp.ncolors - 1;
987
988 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
989 pgfl.pgfl_buckets = (bucketarray + (lcv * newncolors));
990 uvm_page_init_buckets(&pgfl);
991 for (color = 0; color < ocolors; color++) {
992 for (i = 0; i < PGFL_NQUEUES; i++) {
993 while ((pg = TAILQ_FIRST(&uvm.page_free[
994 lcv].pgfl_buckets[color].pgfl_queues[i]))
995 != NULL) {
996 TAILQ_REMOVE(&uvm.page_free[
997 lcv].pgfl_buckets[
998 color].pgfl_queues[i], pg, pageq);
999 TAILQ_INSERT_TAIL(&pgfl.pgfl_buckets[
1000 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
1001 i], pg, pageq);
1002 }
1003 }
1004 }
1005 uvm.page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
1006 }
1007
1008 if (have_recolored_pages) {
1009 mutex_spin_exit(&uvm_fpageqlock);
1010 free(oldbucketarray, M_VMPAGE);
1011 return;
1012 }
1013
1014 have_recolored_pages = true;
1015 mutex_spin_exit(&uvm_fpageqlock);
1016 }
1017
1018 /*
1019 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
1020 */
1021
1022 static struct vm_page *
1023 uvm_pagealloc_pgfl(struct pgfreelist *pgfl, int try1, int try2,
1024 int *trycolorp)
1025 {
1026 struct pglist *freeq;
1027 struct vm_page *pg;
1028 int color, trycolor = *trycolorp;
1029
1030 color = trycolor;
1031 do {
1032 if ((pg = TAILQ_FIRST((freeq =
1033 &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL)
1034 goto gotit;
1035 if ((pg = TAILQ_FIRST((freeq =
1036 &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL)
1037 goto gotit;
1038 color = (color + 1) & uvmexp.colormask;
1039 } while (color != trycolor);
1040
1041 return (NULL);
1042
1043 gotit:
1044 TAILQ_REMOVE(freeq, pg, pageq);
1045 uvmexp.free--;
1046
1047 /* update zero'd page count */
1048 if (pg->flags & PG_ZERO)
1049 uvmexp.zeropages--;
1050
1051 if (color == trycolor)
1052 uvmexp.colorhit++;
1053 else {
1054 uvmexp.colormiss++;
1055 *trycolorp = color;
1056 }
1057
1058 return (pg);
1059 }
1060
1061 /*
1062 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
1063 *
1064 * => return null if no pages free
1065 * => wake up pagedaemon if number of free pages drops below low water mark
1066 * => if obj != NULL, obj must be locked (to put in hash)
1067 * => if anon != NULL, anon must be locked (to put in anon)
1068 * => only one of obj or anon can be non-null
1069 * => caller must activate/deactivate page if it is not wired.
1070 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
1071 * => policy decision: it is more important to pull a page off of the
1072 * appropriate priority free list than it is to get a zero'd or
1073 * unknown contents page. This is because we live with the
1074 * consequences of a bad free list decision for the entire
1075 * lifetime of the page, e.g. if the page comes from memory that
1076 * is slower to access.
1077 */
1078
1079 struct vm_page *
1080 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
1081 int flags, int strat, int free_list)
1082 {
1083 int lcv, try1, try2, zeroit = 0, color;
1084 struct vm_page *pg;
1085 bool use_reserve;
1086
1087 KASSERT(obj == NULL || anon == NULL);
1088 KASSERT(anon == NULL || off == 0);
1089 KASSERT(off == trunc_page(off));
1090 LOCK_ASSERT(obj == NULL || simple_lock_held(&obj->vmobjlock));
1091 LOCK_ASSERT(anon == NULL || simple_lock_held(&anon->an_lock));
1092
1093 mutex_spin_enter(&uvm_fpageqlock);
1094
1095 /*
1096 * This implements a global round-robin page coloring
1097 * algorithm.
1098 *
1099 * XXXJRT: Should we make the `nextcolor' per-CPU?
1100 * XXXJRT: What about virtually-indexed caches?
1101 */
1102
1103 color = uvm.page_free_nextcolor;
1104
1105 /*
1106 * check to see if we need to generate some free pages waking
1107 * the pagedaemon.
1108 */
1109
1110 uvm_kick_pdaemon();
1111
1112 /*
1113 * fail if any of these conditions is true:
1114 * [1] there really are no free pages, or
1115 * [2] only kernel "reserved" pages remain and
1116 * the page isn't being allocated to a kernel object.
1117 * [3] only pagedaemon "reserved" pages remain and
1118 * the requestor isn't the pagedaemon.
1119 */
1120
1121 use_reserve = (flags & UVM_PGA_USERESERVE) ||
1122 (obj && UVM_OBJ_IS_KERN_OBJECT(obj));
1123 if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
1124 (uvmexp.free <= uvmexp.reserve_pagedaemon &&
1125 !(use_reserve && curlwp == uvm.pagedaemon_lwp)))
1126 goto fail;
1127
1128 #if PGFL_NQUEUES != 2
1129 #error uvm_pagealloc_strat needs to be updated
1130 #endif
1131
1132 /*
1133 * If we want a zero'd page, try the ZEROS queue first, otherwise
1134 * we try the UNKNOWN queue first.
1135 */
1136 if (flags & UVM_PGA_ZERO) {
1137 try1 = PGFL_ZEROS;
1138 try2 = PGFL_UNKNOWN;
1139 } else {
1140 try1 = PGFL_UNKNOWN;
1141 try2 = PGFL_ZEROS;
1142 }
1143
1144 again:
1145 switch (strat) {
1146 case UVM_PGA_STRAT_NORMAL:
1147 /* Check all freelists in descending priority order. */
1148 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
1149 pg = uvm_pagealloc_pgfl(&uvm.page_free[lcv],
1150 try1, try2, &color);
1151 if (pg != NULL)
1152 goto gotit;
1153 }
1154
1155 /* No pages free! */
1156 goto fail;
1157
1158 case UVM_PGA_STRAT_ONLY:
1159 case UVM_PGA_STRAT_FALLBACK:
1160 /* Attempt to allocate from the specified free list. */
1161 KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
1162 pg = uvm_pagealloc_pgfl(&uvm.page_free[free_list],
1163 try1, try2, &color);
1164 if (pg != NULL)
1165 goto gotit;
1166
1167 /* Fall back, if possible. */
1168 if (strat == UVM_PGA_STRAT_FALLBACK) {
1169 strat = UVM_PGA_STRAT_NORMAL;
1170 goto again;
1171 }
1172
1173 /* No pages free! */
1174 goto fail;
1175
1176 default:
1177 panic("uvm_pagealloc_strat: bad strat %d", strat);
1178 /* NOTREACHED */
1179 }
1180
1181 gotit:
1182 /*
1183 * We now know which color we actually allocated from; set
1184 * the next color accordingly.
1185 */
1186
1187 uvm.page_free_nextcolor = (color + 1) & uvmexp.colormask;
1188
1189 /*
1190 * update allocation statistics and remember if we have to
1191 * zero the page
1192 */
1193
1194 if (flags & UVM_PGA_ZERO) {
1195 if (pg->flags & PG_ZERO) {
1196 uvmexp.pga_zerohit++;
1197 zeroit = 0;
1198 } else {
1199 uvmexp.pga_zeromiss++;
1200 zeroit = 1;
1201 }
1202 }
1203 mutex_spin_exit(&uvm_fpageqlock);
1204
1205 pg->offset = off;
1206 pg->uobject = obj;
1207 pg->uanon = anon;
1208 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
1209 if (anon) {
1210 anon->an_page = pg;
1211 pg->pqflags = PQ_ANON;
1212 uvmexp.anonpages++;
1213 } else {
1214 if (obj) {
1215 uvm_pageinsert(pg);
1216 }
1217 pg->pqflags = 0;
1218 }
1219 #if defined(UVM_PAGE_TRKOWN)
1220 pg->owner_tag = NULL;
1221 #endif
1222 UVM_PAGE_OWN(pg, "new alloc");
1223
1224 if (flags & UVM_PGA_ZERO) {
1225 /*
1226 * A zero'd page is not clean. If we got a page not already
1227 * zero'd, then we have to zero it ourselves.
1228 */
1229 pg->flags &= ~PG_CLEAN;
1230 if (zeroit)
1231 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1232 }
1233
1234 return(pg);
1235
1236 fail:
1237 mutex_spin_exit(&uvm_fpageqlock);
1238 return (NULL);
1239 }
1240
1241 /*
1242 * uvm_pagereplace: replace a page with another
1243 *
1244 * => object must be locked
1245 */
1246
1247 void
1248 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg)
1249 {
1250
1251 KASSERT((oldpg->flags & PG_TABLED) != 0);
1252 KASSERT(oldpg->uobject != NULL);
1253 KASSERT((newpg->flags & PG_TABLED) == 0);
1254 KASSERT(newpg->uobject == NULL);
1255 LOCK_ASSERT(simple_lock_held(&oldpg->uobject->vmobjlock));
1256
1257 newpg->uobject = oldpg->uobject;
1258 newpg->offset = oldpg->offset;
1259
1260 uvm_pageinsert_after(newpg, oldpg);
1261 uvm_pageremove(oldpg);
1262 }
1263
1264 /*
1265 * uvm_pagerealloc: reallocate a page from one object to another
1266 *
1267 * => both objects must be locked
1268 */
1269
1270 void
1271 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
1272 {
1273 /*
1274 * remove it from the old object
1275 */
1276
1277 if (pg->uobject) {
1278 uvm_pageremove(pg);
1279 }
1280
1281 /*
1282 * put it in the new object
1283 */
1284
1285 if (newobj) {
1286 pg->uobject = newobj;
1287 pg->offset = newoff;
1288 uvm_pageinsert(pg);
1289 }
1290 }
1291
1292 #ifdef DEBUG
1293 /*
1294 * check if page is zero-filled
1295 *
1296 * - called with free page queue lock held.
1297 */
1298 void
1299 uvm_pagezerocheck(struct vm_page *pg)
1300 {
1301 int *p, *ep;
1302
1303 KASSERT(uvm_zerocheckkva != 0);
1304 KASSERT(mutex_owned(&uvm_fpageqlock));
1305
1306 /*
1307 * XXX assuming pmap_kenter_pa and pmap_kremove never call
1308 * uvm page allocator.
1309 *
1310 * it might be better to have "CPU-local temporary map" pmap interface.
1311 */
1312 pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ);
1313 p = (int *)uvm_zerocheckkva;
1314 ep = (int *)((char *)p + PAGE_SIZE);
1315 pmap_update(pmap_kernel());
1316 while (p < ep) {
1317 if (*p != 0)
1318 panic("PG_ZERO page isn't zero-filled");
1319 p++;
1320 }
1321 pmap_kremove(uvm_zerocheckkva, PAGE_SIZE);
1322 }
1323 #endif /* DEBUG */
1324
1325 /*
1326 * uvm_pagefree: free page
1327 *
1328 * => erase page's identity (i.e. remove from hash/object)
1329 * => put page on free list
1330 * => caller must lock owning object (either anon or uvm_object)
1331 * => caller must lock page queues
1332 * => assumes all valid mappings of pg are gone
1333 */
1334
1335 void
1336 uvm_pagefree(struct vm_page *pg)
1337 {
1338 struct pglist *pgfl;
1339 bool iszero;
1340
1341 #ifdef DEBUG
1342 if (pg->uobject == (void *)0xdeadbeef &&
1343 pg->uanon == (void *)0xdeadbeef) {
1344 panic("uvm_pagefree: freeing free page %p", pg);
1345 }
1346 #endif /* DEBUG */
1347
1348 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1349 LOCK_ASSERT(simple_lock_held(&uvm.pageqlock) ||
1350 !uvmpdpol_pageisqueued_p(pg));
1351 LOCK_ASSERT(pg->uobject == NULL ||
1352 simple_lock_held(&pg->uobject->vmobjlock));
1353 LOCK_ASSERT(pg->uobject != NULL || pg->uanon == NULL ||
1354 simple_lock_held(&pg->uanon->an_lock));
1355
1356 /*
1357 * if the page is loaned, resolve the loan instead of freeing.
1358 */
1359
1360 if (pg->loan_count) {
1361 KASSERT(pg->wire_count == 0);
1362
1363 /*
1364 * if the page is owned by an anon then we just want to
1365 * drop anon ownership. the kernel will free the page when
1366 * it is done with it. if the page is owned by an object,
1367 * remove it from the object and mark it dirty for the benefit
1368 * of possible anon owners.
1369 *
1370 * regardless of previous ownership, wakeup any waiters,
1371 * unbusy the page, and we're done.
1372 */
1373
1374 if (pg->uobject != NULL) {
1375 uvm_pageremove(pg);
1376 pg->flags &= ~PG_CLEAN;
1377 } else if (pg->uanon != NULL) {
1378 if ((pg->pqflags & PQ_ANON) == 0) {
1379 pg->loan_count--;
1380 } else {
1381 pg->pqflags &= ~PQ_ANON;
1382 uvmexp.anonpages--;
1383 }
1384 pg->uanon->an_page = NULL;
1385 pg->uanon = NULL;
1386 }
1387 if (pg->flags & PG_WANTED) {
1388 wakeup(pg);
1389 }
1390 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
1391 #ifdef UVM_PAGE_TRKOWN
1392 pg->owner_tag = NULL;
1393 #endif
1394 if (pg->loan_count) {
1395 KASSERT(pg->uobject == NULL);
1396 if (pg->uanon == NULL) {
1397 uvm_pagedequeue(pg);
1398 }
1399 return;
1400 }
1401 }
1402
1403 /*
1404 * remove page from its object or anon.
1405 */
1406
1407 if (pg->uobject != NULL) {
1408 uvm_pageremove(pg);
1409 } else if (pg->uanon != NULL) {
1410 pg->uanon->an_page = NULL;
1411 uvmexp.anonpages--;
1412 }
1413
1414 /*
1415 * now remove the page from the queues.
1416 */
1417
1418 uvm_pagedequeue(pg);
1419
1420 /*
1421 * if the page was wired, unwire it now.
1422 */
1423
1424 if (pg->wire_count) {
1425 pg->wire_count = 0;
1426 uvmexp.wired--;
1427 }
1428
1429 /*
1430 * and put on free queue
1431 */
1432
1433 iszero = (pg->flags & PG_ZERO);
1434 pgfl = &uvm.page_free[uvm_page_lookup_freelist(pg)].
1435 pgfl_buckets[VM_PGCOLOR_BUCKET(pg)].
1436 pgfl_queues[iszero ? PGFL_ZEROS : PGFL_UNKNOWN];
1437
1438 pg->pqflags = PQ_FREE;
1439 #ifdef DEBUG
1440 pg->uobject = (void *)0xdeadbeef;
1441 pg->offset = 0xdeadbeef;
1442 pg->uanon = (void *)0xdeadbeef;
1443 #endif
1444
1445 mutex_spin_enter(&uvm_fpageqlock);
1446
1447 #ifdef DEBUG
1448 if (iszero)
1449 uvm_pagezerocheck(pg);
1450 #endif /* DEBUG */
1451
1452 TAILQ_INSERT_HEAD(pgfl, pg, pageq);
1453 uvmexp.free++;
1454 if (iszero)
1455 uvmexp.zeropages++;
1456
1457 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET)
1458 uvm.page_idle_zero = vm_page_zero_enable;
1459
1460 mutex_spin_exit(&uvm_fpageqlock);
1461 }
1462
1463 /*
1464 * uvm_page_unbusy: unbusy an array of pages.
1465 *
1466 * => pages must either all belong to the same object, or all belong to anons.
1467 * => if pages are object-owned, object must be locked.
1468 * => if pages are anon-owned, anons must be locked.
1469 * => caller must lock page queues if pages may be released.
1470 * => caller must make sure that anon-owned pages are not PG_RELEASED.
1471 */
1472
1473 void
1474 uvm_page_unbusy(struct vm_page **pgs, int npgs)
1475 {
1476 struct vm_page *pg;
1477 int i;
1478 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
1479
1480 for (i = 0; i < npgs; i++) {
1481 pg = pgs[i];
1482 if (pg == NULL || pg == PGO_DONTCARE) {
1483 continue;
1484 }
1485
1486 LOCK_ASSERT(pg->uobject == NULL ||
1487 simple_lock_held(&pg->uobject->vmobjlock));
1488 LOCK_ASSERT(pg->uobject != NULL ||
1489 (pg->uanon != NULL &&
1490 simple_lock_held(&pg->uanon->an_lock)));
1491
1492 KASSERT(pg->flags & PG_BUSY);
1493 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1494 if (pg->flags & PG_WANTED) {
1495 wakeup(pg);
1496 }
1497 if (pg->flags & PG_RELEASED) {
1498 UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0);
1499 KASSERT(pg->uobject != NULL ||
1500 (pg->uanon != NULL && pg->uanon->an_ref > 0));
1501 pg->flags &= ~PG_RELEASED;
1502 uvm_pagefree(pg);
1503 } else {
1504 UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0);
1505 pg->flags &= ~(PG_WANTED|PG_BUSY);
1506 UVM_PAGE_OWN(pg, NULL);
1507 }
1508 }
1509 }
1510
1511 #if defined(UVM_PAGE_TRKOWN)
1512 /*
1513 * uvm_page_own: set or release page ownership
1514 *
1515 * => this is a debugging function that keeps track of who sets PG_BUSY
1516 * and where they do it. it can be used to track down problems
1517 * such a process setting "PG_BUSY" and never releasing it.
1518 * => page's object [if any] must be locked
1519 * => if "tag" is NULL then we are releasing page ownership
1520 */
1521 void
1522 uvm_page_own(struct vm_page *pg, const char *tag)
1523 {
1524 struct uvm_object *uobj;
1525 struct vm_anon *anon;
1526
1527 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
1528
1529 uobj = pg->uobject;
1530 anon = pg->uanon;
1531 if (uobj != NULL) {
1532 LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
1533 } else if (anon != NULL) {
1534 LOCK_ASSERT(simple_lock_held(&anon->an_lock));
1535 }
1536
1537 KASSERT((pg->flags & PG_WANTED) == 0);
1538
1539 /* gain ownership? */
1540 if (tag) {
1541 KASSERT((pg->flags & PG_BUSY) != 0);
1542 if (pg->owner_tag) {
1543 printf("uvm_page_own: page %p already owned "
1544 "by proc %d [%s]\n", pg,
1545 pg->owner, pg->owner_tag);
1546 panic("uvm_page_own");
1547 }
1548 pg->owner = (curproc) ? curproc->p_pid : (pid_t) -1;
1549 pg->lowner = (curlwp) ? curlwp->l_lid : (lwpid_t) -1;
1550 pg->owner_tag = tag;
1551 return;
1552 }
1553
1554 /* drop ownership */
1555 KASSERT((pg->flags & PG_BUSY) == 0);
1556 if (pg->owner_tag == NULL) {
1557 printf("uvm_page_own: dropping ownership of an non-owned "
1558 "page (%p)\n", pg);
1559 panic("uvm_page_own");
1560 }
1561 if (!uvmpdpol_pageisqueued_p(pg)) {
1562 KASSERT((pg->uanon == NULL && pg->uobject == NULL) ||
1563 pg->wire_count > 0);
1564 } else {
1565 KASSERT(pg->wire_count == 0);
1566 }
1567 pg->owner_tag = NULL;
1568 }
1569 #endif
1570
1571 /*
1572 * uvm_pageidlezero: zero free pages while the system is idle.
1573 *
1574 * => try to complete one color bucket at a time, to reduce our impact
1575 * on the CPU cache.
1576 * => we loop until we either reach the target or there is a lwp ready to run.
1577 */
1578 void
1579 uvm_pageidlezero(void)
1580 {
1581 struct vm_page *pg;
1582 struct pgfreelist *pgfl;
1583 int free_list, firstbucket;
1584 static int nextbucket;
1585
1586 KERNEL_LOCK(1, NULL);
1587 mutex_spin_enter(&uvm_fpageqlock);
1588 firstbucket = nextbucket;
1589 do {
1590 if (sched_curcpu_runnable_p()) {
1591 goto quit;
1592 }
1593 if (uvmexp.zeropages >= UVM_PAGEZERO_TARGET) {
1594 uvm.page_idle_zero = false;
1595 goto quit;
1596 }
1597 for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
1598 pgfl = &uvm.page_free[free_list];
1599 while ((pg = TAILQ_FIRST(&pgfl->pgfl_buckets[
1600 nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
1601 if (sched_curcpu_runnable_p())
1602 goto quit;
1603
1604 TAILQ_REMOVE(&pgfl->pgfl_buckets[
1605 nextbucket].pgfl_queues[PGFL_UNKNOWN],
1606 pg, pageq);
1607 uvmexp.free--;
1608 mutex_spin_exit(&uvm_fpageqlock);
1609 KERNEL_UNLOCK_LAST(NULL);
1610 #ifdef PMAP_PAGEIDLEZERO
1611 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) {
1612
1613 /*
1614 * The machine-dependent code detected
1615 * some reason for us to abort zeroing
1616 * pages, probably because there is a
1617 * process now ready to run.
1618 */
1619
1620 KERNEL_LOCK(1, NULL);
1621 mutex_spin_enter(&uvm_fpageqlock);
1622 TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
1623 nextbucket].pgfl_queues[
1624 PGFL_UNKNOWN], pg, pageq);
1625 uvmexp.free++;
1626 uvmexp.zeroaborts++;
1627 goto quit;
1628 }
1629 #else
1630 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1631 #endif /* PMAP_PAGEIDLEZERO */
1632 pg->flags |= PG_ZERO;
1633
1634 KERNEL_LOCK(1, NULL);
1635 mutex_spin_enter(&uvm_fpageqlock);
1636 TAILQ_INSERT_HEAD(&pgfl->pgfl_buckets[
1637 nextbucket].pgfl_queues[PGFL_ZEROS],
1638 pg, pageq);
1639 uvmexp.free++;
1640 uvmexp.zeropages++;
1641 }
1642 }
1643 nextbucket = (nextbucket + 1) & uvmexp.colormask;
1644 } while (nextbucket != firstbucket);
1645 quit:
1646 mutex_spin_exit(&uvm_fpageqlock);
1647 KERNEL_UNLOCK_LAST(NULL);
1648 }
1649
1650 /*
1651 * uvm_pagelookup: look up a page
1652 *
1653 * => caller should lock object to keep someone from pulling the page
1654 * out from under it
1655 */
1656
1657 struct vm_page *
1658 uvm_pagelookup(struct uvm_object *obj, voff_t off)
1659 {
1660 struct vm_page *pg;
1661 struct pglist *buck;
1662 kmutex_t *lock;
1663 u_int hash;
1664
1665 LOCK_ASSERT(simple_lock_held(&obj->vmobjlock));
1666
1667 hash = uvm_pagehash(obj, off);
1668 buck = &uvm.page_hash[hash];
1669 lock = uvm_hashlock(hash);
1670 mutex_spin_enter(lock);
1671 TAILQ_FOREACH(pg, buck, hashq) {
1672 if (pg->uobject == obj && pg->offset == off) {
1673 break;
1674 }
1675 }
1676 mutex_spin_exit(lock);
1677 KASSERT(pg == NULL || obj->uo_npages != 0);
1678 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1679 (pg->flags & PG_BUSY) != 0);
1680 return(pg);
1681 }
1682
1683 /*
1684 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1685 *
1686 * => caller must lock page queues
1687 */
1688
1689 void
1690 uvm_pagewire(struct vm_page *pg)
1691 {
1692 UVM_LOCK_ASSERT_PAGEQ();
1693 #if defined(READAHEAD_STATS)
1694 if ((pg->pqflags & PQ_READAHEAD) != 0) {
1695 uvm_ra_hit.ev_count++;
1696 pg->pqflags &= ~PQ_READAHEAD;
1697 }
1698 #endif /* defined(READAHEAD_STATS) */
1699 if (pg->wire_count == 0) {
1700 uvm_pagedequeue(pg);
1701 uvmexp.wired++;
1702 }
1703 pg->wire_count++;
1704 }
1705
1706 /*
1707 * uvm_pageunwire: unwire the page.
1708 *
1709 * => activate if wire count goes to zero.
1710 * => caller must lock page queues
1711 */
1712
1713 void
1714 uvm_pageunwire(struct vm_page *pg)
1715 {
1716 UVM_LOCK_ASSERT_PAGEQ();
1717 pg->wire_count--;
1718 if (pg->wire_count == 0) {
1719 uvm_pageactivate(pg);
1720 uvmexp.wired--;
1721 }
1722 }
1723
1724 /*
1725 * uvm_pagedeactivate: deactivate page
1726 *
1727 * => caller must lock page queues
1728 * => caller must check to make sure page is not wired
1729 * => object that page belongs to must be locked (so we can adjust pg->flags)
1730 * => caller must clear the reference on the page before calling
1731 */
1732
1733 void
1734 uvm_pagedeactivate(struct vm_page *pg)
1735 {
1736
1737 UVM_LOCK_ASSERT_PAGEQ();
1738 KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg));
1739 uvmpdpol_pagedeactivate(pg);
1740 }
1741
1742 /*
1743 * uvm_pageactivate: activate page
1744 *
1745 * => caller must lock page queues
1746 */
1747
1748 void
1749 uvm_pageactivate(struct vm_page *pg)
1750 {
1751
1752 UVM_LOCK_ASSERT_PAGEQ();
1753 #if defined(READAHEAD_STATS)
1754 if ((pg->pqflags & PQ_READAHEAD) != 0) {
1755 uvm_ra_hit.ev_count++;
1756 pg->pqflags &= ~PQ_READAHEAD;
1757 }
1758 #endif /* defined(READAHEAD_STATS) */
1759 if (pg->wire_count != 0) {
1760 return;
1761 }
1762 uvmpdpol_pageactivate(pg);
1763 }
1764
1765 /*
1766 * uvm_pagedequeue: remove a page from any paging queue
1767 */
1768
1769 void
1770 uvm_pagedequeue(struct vm_page *pg)
1771 {
1772
1773 if (uvmpdpol_pageisqueued_p(pg)) {
1774 UVM_LOCK_ASSERT_PAGEQ();
1775 }
1776
1777 uvmpdpol_pagedequeue(pg);
1778 }
1779
1780 /*
1781 * uvm_pageenqueue: add a page to a paging queue without activating.
1782 * used where a page is not really demanded (yet). eg. read-ahead
1783 */
1784
1785 void
1786 uvm_pageenqueue(struct vm_page *pg)
1787 {
1788
1789 UVM_LOCK_ASSERT_PAGEQ();
1790 if (pg->wire_count != 0) {
1791 return;
1792 }
1793 uvmpdpol_pageenqueue(pg);
1794 }
1795
1796 /*
1797 * uvm_pagezero: zero fill a page
1798 *
1799 * => if page is part of an object then the object should be locked
1800 * to protect pg->flags.
1801 */
1802
1803 void
1804 uvm_pagezero(struct vm_page *pg)
1805 {
1806 pg->flags &= ~PG_CLEAN;
1807 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1808 }
1809
1810 /*
1811 * uvm_pagecopy: copy a page
1812 *
1813 * => if page is part of an object then the object should be locked
1814 * to protect pg->flags.
1815 */
1816
1817 void
1818 uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
1819 {
1820
1821 dst->flags &= ~PG_CLEAN;
1822 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
1823 }
1824
1825 /*
1826 * uvm_page_lookup_freelist: look up the free list for the specified page
1827 */
1828
1829 int
1830 uvm_page_lookup_freelist(struct vm_page *pg)
1831 {
1832 int lcv;
1833
1834 lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
1835 KASSERT(lcv != -1);
1836 return (vm_physmem[lcv].free_list);
1837 }
1838