uvm_page.c revision 1.133 1 /* $NetBSD: uvm_page.c,v 1.133 2008/06/04 12:45:28 ad Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
42 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
43 *
44 *
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 */
68
69 /*
70 * uvm_page.c: page ops.
71 */
72
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.133 2008/06/04 12:45:28 ad Exp $");
75
76 #include "opt_uvmhist.h"
77 #include "opt_readahead.h"
78
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/malloc.h>
82 #include <sys/sched.h>
83 #include <sys/kernel.h>
84 #include <sys/vnode.h>
85 #include <sys/proc.h>
86 #include <sys/atomic.h>
87 #include <sys/cpu.h>
88
89 #include <uvm/uvm.h>
90 #include <uvm/uvm_pdpolicy.h>
91
92 /*
93 * global vars... XXXCDC: move to uvm. structure.
94 */
95
96 /*
97 * physical memory config is stored in vm_physmem.
98 */
99
100 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */
101 int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */
102
103 /*
104 * Some supported CPUs in a given architecture don't support all
105 * of the things necessary to do idle page zero'ing efficiently.
106 * We therefore provide a way to disable it from machdep code here.
107 */
108 /*
109 * XXX disabled until we can find a way to do this without causing
110 * problems for either CPU caches or DMA latency.
111 */
112 bool vm_page_zero_enable = false;
113
114 /*
115 * local variables
116 */
117
118 /*
119 * these variables record the values returned by vm_page_bootstrap,
120 * for debugging purposes. The implementation of uvm_pageboot_alloc
121 * and pmap_startup here also uses them internally.
122 */
123
124 static vaddr_t virtual_space_start;
125 static vaddr_t virtual_space_end;
126
127 /*
128 * we use a hash table with only one bucket during bootup. we will
129 * later rehash (resize) the hash table once the allocator is ready.
130 * we static allocate the one bootstrap bucket below...
131 */
132
133 static struct pglist uvm_bootbucket;
134
135 /*
136 * we allocate an initial number of page colors in uvm_page_init(),
137 * and remember them. We may re-color pages as cache sizes are
138 * discovered during the autoconfiguration phase. But we can never
139 * free the initial set of buckets, since they are allocated using
140 * uvm_pageboot_alloc().
141 */
142
143 static bool have_recolored_pages /* = false */;
144
145 MALLOC_DEFINE(M_VMPAGE, "VM page", "VM page");
146
147 #ifdef DEBUG
148 vaddr_t uvm_zerocheckkva;
149 #endif /* DEBUG */
150
151 /*
152 * locks on the hash table. allocated in 32 byte chunks to try
153 * and reduce cache traffic between CPUs.
154 */
155
156 #define UVM_HASHLOCK_CNT 32
157 #define uvm_hashlock(hash) \
158 (&uvm_hashlocks[(hash) & (UVM_HASHLOCK_CNT - 1)].lock)
159
160 static union {
161 kmutex_t lock;
162 uint8_t pad[32];
163 } uvm_hashlocks[UVM_HASHLOCK_CNT] __aligned(32);
164
165 /*
166 * local prototypes
167 */
168
169 static void uvm_pageinsert(struct vm_page *);
170 static void uvm_pageinsert_after(struct vm_page *, struct vm_page *);
171 static void uvm_pageremove(struct vm_page *);
172
173 /*
174 * inline functions
175 */
176
177 /*
178 * uvm_pageinsert: insert a page in the object and the hash table
179 * uvm_pageinsert_after: insert a page into the specified place in listq
180 *
181 * => caller must lock object
182 * => caller must lock page queues
183 * => call should have already set pg's object and offset pointers
184 * and bumped the version counter
185 */
186
187 inline static void
188 uvm_pageinsert_after(struct vm_page *pg, struct vm_page *where)
189 {
190 struct pglist *buck;
191 struct uvm_object *uobj = pg->uobject;
192 kmutex_t *lock;
193 u_int hash;
194
195 KASSERT(mutex_owned(&uobj->vmobjlock));
196 KASSERT((pg->flags & PG_TABLED) == 0);
197 KASSERT(where == NULL || (where->flags & PG_TABLED));
198 KASSERT(where == NULL || (where->uobject == uobj));
199
200 hash = uvm_pagehash(uobj, pg->offset);
201 buck = &uvm.page_hash[hash];
202 lock = uvm_hashlock(hash);
203 mutex_spin_enter(lock);
204 TAILQ_INSERT_TAIL(buck, pg, hashq);
205 mutex_spin_exit(lock);
206
207 if (UVM_OBJ_IS_VNODE(uobj)) {
208 if (uobj->uo_npages == 0) {
209 struct vnode *vp = (struct vnode *)uobj;
210
211 vholdl(vp);
212 }
213 if (UVM_OBJ_IS_VTEXT(uobj)) {
214 atomic_inc_uint(&uvmexp.execpages);
215 } else {
216 atomic_inc_uint(&uvmexp.filepages);
217 }
218 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
219 atomic_inc_uint(&uvmexp.anonpages);
220 }
221
222 if (where)
223 TAILQ_INSERT_AFTER(&uobj->memq, where, pg, listq.queue);
224 else
225 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
226 pg->flags |= PG_TABLED;
227 uobj->uo_npages++;
228 }
229
230 inline static void
231 uvm_pageinsert(struct vm_page *pg)
232 {
233
234 uvm_pageinsert_after(pg, NULL);
235 }
236
237 /*
238 * uvm_page_remove: remove page from object and hash
239 *
240 * => caller must lock object
241 * => caller must lock page queues
242 */
243
244 static inline void
245 uvm_pageremove(struct vm_page *pg)
246 {
247 struct pglist *buck;
248 struct uvm_object *uobj = pg->uobject;
249 kmutex_t *lock;
250 u_int hash;
251
252 KASSERT(mutex_owned(&uobj->vmobjlock));
253 KASSERT(pg->flags & PG_TABLED);
254
255 hash = uvm_pagehash(uobj, pg->offset);
256 buck = &uvm.page_hash[hash];
257 lock = uvm_hashlock(hash);
258 mutex_spin_enter(lock);
259 TAILQ_REMOVE(buck, pg, hashq);
260 mutex_spin_exit(lock);
261
262 if (UVM_OBJ_IS_VNODE(uobj)) {
263 if (uobj->uo_npages == 1) {
264 struct vnode *vp = (struct vnode *)uobj;
265
266 holdrelel(vp);
267 }
268 if (UVM_OBJ_IS_VTEXT(uobj)) {
269 atomic_dec_uint(&uvmexp.execpages);
270 } else {
271 atomic_dec_uint(&uvmexp.filepages);
272 }
273 } else if (UVM_OBJ_IS_AOBJ(uobj)) {
274 atomic_dec_uint(&uvmexp.anonpages);
275 }
276
277 /* object should be locked */
278 uobj->uo_npages--;
279 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
280 pg->flags &= ~PG_TABLED;
281 pg->uobject = NULL;
282 }
283
284 static void
285 uvm_page_init_buckets(struct pgfreelist *pgfl)
286 {
287 int color, i;
288
289 for (color = 0; color < uvmexp.ncolors; color++) {
290 for (i = 0; i < PGFL_NQUEUES; i++) {
291 LIST_INIT(&pgfl->pgfl_buckets[color].pgfl_queues[i]);
292 }
293 }
294 }
295
296 /*
297 * uvm_page_init: init the page system. called from uvm_init().
298 *
299 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
300 */
301
302 void
303 uvm_page_init(vaddr_t *kvm_startp, vaddr_t *kvm_endp)
304 {
305 vsize_t freepages, pagecount, bucketcount, n;
306 struct pgflbucket *bucketarray, *cpuarray;
307 struct vm_page *pagearray;
308 int lcv;
309 u_int i;
310 paddr_t paddr;
311
312 KASSERT(ncpu <= 1);
313 KASSERT(sizeof(pagearray->offset) >= sizeof(struct uvm_cpu *));
314
315 /*
316 * init the page queues and page queue locks, except the free
317 * list; we allocate that later (with the initial vm_page
318 * structures).
319 */
320
321 curcpu()->ci_data.cpu_uvm = &uvm.cpus[0];
322 uvmpdpol_init();
323 mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE);
324 mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM);
325
326 /*
327 * init the <obj,offset> => <page> hash table. for now
328 * we just have one bucket (the bootstrap bucket). later on we
329 * will allocate new buckets as we dynamically resize the hash table.
330 */
331
332 uvm.page_nhash = 1; /* 1 bucket */
333 uvm.page_hashmask = 0; /* mask for hash function */
334 uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */
335 TAILQ_INIT(uvm.page_hash); /* init hash table */
336
337 /*
338 * init hashtable locks. these must be spinlocks, as they are
339 * called from sites in the pmap modules where we cannot block.
340 * if taking multiple locks, the order is: low numbered first,
341 * high numbered second.
342 */
343
344 for (i = 0; i < UVM_HASHLOCK_CNT; i++)
345 mutex_init(&uvm_hashlocks[i].lock, MUTEX_SPIN, IPL_VM);
346
347 /*
348 * allocate vm_page structures.
349 */
350
351 /*
352 * sanity check:
353 * before calling this function the MD code is expected to register
354 * some free RAM with the uvm_page_physload() function. our job
355 * now is to allocate vm_page structures for this memory.
356 */
357
358 if (vm_nphysseg == 0)
359 panic("uvm_page_bootstrap: no memory pre-allocated");
360
361 /*
362 * first calculate the number of free pages...
363 *
364 * note that we use start/end rather than avail_start/avail_end.
365 * this allows us to allocate extra vm_page structures in case we
366 * want to return some memory to the pool after booting.
367 */
368
369 freepages = 0;
370 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
371 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
372
373 /*
374 * Let MD code initialize the number of colors, or default
375 * to 1 color if MD code doesn't care.
376 */
377 if (uvmexp.ncolors == 0)
378 uvmexp.ncolors = 1;
379 uvmexp.colormask = uvmexp.ncolors - 1;
380
381 /*
382 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
383 * use. for each page of memory we use we need a vm_page structure.
384 * thus, the total number of pages we can use is the total size of
385 * the memory divided by the PAGE_SIZE plus the size of the vm_page
386 * structure. we add one to freepages as a fudge factor to avoid
387 * truncation errors (since we can only allocate in terms of whole
388 * pages).
389 */
390
391 bucketcount = uvmexp.ncolors * VM_NFREELIST;
392 pagecount = ((freepages + 1) << PAGE_SHIFT) /
393 (PAGE_SIZE + sizeof(struct vm_page));
394
395 bucketarray = (void *)uvm_pageboot_alloc((bucketcount *
396 sizeof(struct pgflbucket) * 2) + (pagecount *
397 sizeof(struct vm_page)));
398 cpuarray = bucketarray + bucketcount;
399 pagearray = (struct vm_page *)(bucketarray + bucketcount * 2);
400
401 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
402 uvm.page_free[lcv].pgfl_buckets =
403 (bucketarray + (lcv * uvmexp.ncolors));
404 uvm_page_init_buckets(&uvm.page_free[lcv]);
405 uvm.cpus[0].page_free[lcv].pgfl_buckets =
406 (cpuarray + (lcv * uvmexp.ncolors));
407 uvm_page_init_buckets(&uvm.cpus[0].page_free[lcv]);
408 }
409 memset(pagearray, 0, pagecount * sizeof(struct vm_page));
410
411 /*
412 * init the vm_page structures and put them in the correct place.
413 */
414
415 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
416 n = vm_physmem[lcv].end - vm_physmem[lcv].start;
417
418 /* set up page array pointers */
419 vm_physmem[lcv].pgs = pagearray;
420 pagearray += n;
421 pagecount -= n;
422 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
423
424 /* init and free vm_pages (we've already zeroed them) */
425 paddr = ptoa(vm_physmem[lcv].start);
426 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
427 vm_physmem[lcv].pgs[i].phys_addr = paddr;
428 #ifdef __HAVE_VM_PAGE_MD
429 VM_MDPAGE_INIT(&vm_physmem[lcv].pgs[i]);
430 #endif
431 if (atop(paddr) >= vm_physmem[lcv].avail_start &&
432 atop(paddr) <= vm_physmem[lcv].avail_end) {
433 uvmexp.npages++;
434 /* add page to free pool */
435 uvm_pagefree(&vm_physmem[lcv].pgs[i]);
436 }
437 }
438 }
439
440 /*
441 * pass up the values of virtual_space_start and
442 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
443 * layers of the VM.
444 */
445
446 *kvm_startp = round_page(virtual_space_start);
447 *kvm_endp = trunc_page(virtual_space_end);
448 #ifdef DEBUG
449 /*
450 * steal kva for uvm_pagezerocheck().
451 */
452 uvm_zerocheckkva = *kvm_startp;
453 *kvm_startp += PAGE_SIZE;
454 #endif /* DEBUG */
455
456 /*
457 * init various thresholds.
458 */
459
460 uvmexp.reserve_pagedaemon = 1;
461 uvmexp.reserve_kernel = 5;
462
463 /*
464 * determine if we should zero pages in the idle loop.
465 */
466
467 uvm.cpus[0].page_idle_zero = vm_page_zero_enable;
468
469 /*
470 * done!
471 */
472
473 uvm.page_init_done = true;
474 }
475
476 /*
477 * uvm_setpagesize: set the page size
478 *
479 * => sets page_shift and page_mask from uvmexp.pagesize.
480 */
481
482 void
483 uvm_setpagesize(void)
484 {
485
486 /*
487 * If uvmexp.pagesize is 0 at this point, we expect PAGE_SIZE
488 * to be a constant (indicated by being a non-zero value).
489 */
490 if (uvmexp.pagesize == 0) {
491 if (PAGE_SIZE == 0)
492 panic("uvm_setpagesize: uvmexp.pagesize not set");
493 uvmexp.pagesize = PAGE_SIZE;
494 }
495 uvmexp.pagemask = uvmexp.pagesize - 1;
496 if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
497 panic("uvm_setpagesize: page size not a power of two");
498 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
499 if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
500 break;
501 }
502
503 /*
504 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
505 */
506
507 vaddr_t
508 uvm_pageboot_alloc(vsize_t size)
509 {
510 static bool initialized = false;
511 vaddr_t addr;
512 #if !defined(PMAP_STEAL_MEMORY)
513 vaddr_t vaddr;
514 paddr_t paddr;
515 #endif
516
517 /*
518 * on first call to this function, initialize ourselves.
519 */
520 if (initialized == false) {
521 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
522
523 /* round it the way we like it */
524 virtual_space_start = round_page(virtual_space_start);
525 virtual_space_end = trunc_page(virtual_space_end);
526
527 initialized = true;
528 }
529
530 /* round to page size */
531 size = round_page(size);
532
533 #if defined(PMAP_STEAL_MEMORY)
534
535 /*
536 * defer bootstrap allocation to MD code (it may want to allocate
537 * from a direct-mapped segment). pmap_steal_memory should adjust
538 * virtual_space_start/virtual_space_end if necessary.
539 */
540
541 addr = pmap_steal_memory(size, &virtual_space_start,
542 &virtual_space_end);
543
544 return(addr);
545
546 #else /* !PMAP_STEAL_MEMORY */
547
548 /*
549 * allocate virtual memory for this request
550 */
551 if (virtual_space_start == virtual_space_end ||
552 (virtual_space_end - virtual_space_start) < size)
553 panic("uvm_pageboot_alloc: out of virtual space");
554
555 addr = virtual_space_start;
556
557 #ifdef PMAP_GROWKERNEL
558 /*
559 * If the kernel pmap can't map the requested space,
560 * then allocate more resources for it.
561 */
562 if (uvm_maxkaddr < (addr + size)) {
563 uvm_maxkaddr = pmap_growkernel(addr + size);
564 if (uvm_maxkaddr < (addr + size))
565 panic("uvm_pageboot_alloc: pmap_growkernel() failed");
566 }
567 #endif
568
569 virtual_space_start += size;
570
571 /*
572 * allocate and mapin physical pages to back new virtual pages
573 */
574
575 for (vaddr = round_page(addr) ; vaddr < addr + size ;
576 vaddr += PAGE_SIZE) {
577
578 if (!uvm_page_physget(&paddr))
579 panic("uvm_pageboot_alloc: out of memory");
580
581 /*
582 * Note this memory is no longer managed, so using
583 * pmap_kenter is safe.
584 */
585 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
586 }
587 pmap_update(pmap_kernel());
588 return(addr);
589 #endif /* PMAP_STEAL_MEMORY */
590 }
591
592 #if !defined(PMAP_STEAL_MEMORY)
593 /*
594 * uvm_page_physget: "steal" one page from the vm_physmem structure.
595 *
596 * => attempt to allocate it off the end of a segment in which the "avail"
597 * values match the start/end values. if we can't do that, then we
598 * will advance both values (making them equal, and removing some
599 * vm_page structures from the non-avail area).
600 * => return false if out of memory.
601 */
602
603 /* subroutine: try to allocate from memory chunks on the specified freelist */
604 static bool uvm_page_physget_freelist(paddr_t *, int);
605
606 static bool
607 uvm_page_physget_freelist(paddr_t *paddrp, int freelist)
608 {
609 int lcv, x;
610
611 /* pass 1: try allocating from a matching end */
612 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
613 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
614 #else
615 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
616 #endif
617 {
618
619 if (uvm.page_init_done == true)
620 panic("uvm_page_physget: called _after_ bootstrap");
621
622 if (vm_physmem[lcv].free_list != freelist)
623 continue;
624
625 /* try from front */
626 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
627 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
628 *paddrp = ptoa(vm_physmem[lcv].avail_start);
629 vm_physmem[lcv].avail_start++;
630 vm_physmem[lcv].start++;
631 /* nothing left? nuke it */
632 if (vm_physmem[lcv].avail_start ==
633 vm_physmem[lcv].end) {
634 if (vm_nphysseg == 1)
635 panic("uvm_page_physget: out of memory!");
636 vm_nphysseg--;
637 for (x = lcv ; x < vm_nphysseg ; x++)
638 /* structure copy */
639 vm_physmem[x] = vm_physmem[x+1];
640 }
641 return (true);
642 }
643
644 /* try from rear */
645 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
646 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
647 *paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
648 vm_physmem[lcv].avail_end--;
649 vm_physmem[lcv].end--;
650 /* nothing left? nuke it */
651 if (vm_physmem[lcv].avail_end ==
652 vm_physmem[lcv].start) {
653 if (vm_nphysseg == 1)
654 panic("uvm_page_physget: out of memory!");
655 vm_nphysseg--;
656 for (x = lcv ; x < vm_nphysseg ; x++)
657 /* structure copy */
658 vm_physmem[x] = vm_physmem[x+1];
659 }
660 return (true);
661 }
662 }
663
664 /* pass2: forget about matching ends, just allocate something */
665 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
666 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
667 #else
668 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
669 #endif
670 {
671
672 /* any room in this bank? */
673 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
674 continue; /* nope */
675
676 *paddrp = ptoa(vm_physmem[lcv].avail_start);
677 vm_physmem[lcv].avail_start++;
678 /* truncate! */
679 vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
680
681 /* nothing left? nuke it */
682 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
683 if (vm_nphysseg == 1)
684 panic("uvm_page_physget: out of memory!");
685 vm_nphysseg--;
686 for (x = lcv ; x < vm_nphysseg ; x++)
687 /* structure copy */
688 vm_physmem[x] = vm_physmem[x+1];
689 }
690 return (true);
691 }
692
693 return (false); /* whoops! */
694 }
695
696 bool
697 uvm_page_physget(paddr_t *paddrp)
698 {
699 int i;
700
701 /* try in the order of freelist preference */
702 for (i = 0; i < VM_NFREELIST; i++)
703 if (uvm_page_physget_freelist(paddrp, i) == true)
704 return (true);
705 return (false);
706 }
707 #endif /* PMAP_STEAL_MEMORY */
708
709 /*
710 * uvm_page_physload: load physical memory into VM system
711 *
712 * => all args are PFs
713 * => all pages in start/end get vm_page structures
714 * => areas marked by avail_start/avail_end get added to the free page pool
715 * => we are limited to VM_PHYSSEG_MAX physical memory segments
716 */
717
718 void
719 uvm_page_physload(paddr_t start, paddr_t end, paddr_t avail_start,
720 paddr_t avail_end, int free_list)
721 {
722 int preload, lcv;
723 psize_t npages;
724 struct vm_page *pgs;
725 struct vm_physseg *ps;
726
727 if (uvmexp.pagesize == 0)
728 panic("uvm_page_physload: page size not set!");
729 if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
730 panic("uvm_page_physload: bad free list %d", free_list);
731 if (start >= end)
732 panic("uvm_page_physload: start >= end");
733
734 /*
735 * do we have room?
736 */
737
738 if (vm_nphysseg == VM_PHYSSEG_MAX) {
739 printf("uvm_page_physload: unable to load physical memory "
740 "segment\n");
741 printf("\t%d segments allocated, ignoring 0x%llx -> 0x%llx\n",
742 VM_PHYSSEG_MAX, (long long)start, (long long)end);
743 printf("\tincrease VM_PHYSSEG_MAX\n");
744 return;
745 }
746
747 /*
748 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
749 * called yet, so malloc is not available).
750 */
751
752 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
753 if (vm_physmem[lcv].pgs)
754 break;
755 }
756 preload = (lcv == vm_nphysseg);
757
758 /*
759 * if VM is already running, attempt to malloc() vm_page structures
760 */
761
762 if (!preload) {
763 #if defined(VM_PHYSSEG_NOADD)
764 panic("uvm_page_physload: tried to add RAM after vm_mem_init");
765 #else
766 /* XXXCDC: need some sort of lockout for this case */
767 paddr_t paddr;
768 npages = end - start; /* # of pages */
769 pgs = malloc(sizeof(struct vm_page) * npages,
770 M_VMPAGE, M_NOWAIT);
771 if (pgs == NULL) {
772 printf("uvm_page_physload: can not malloc vm_page "
773 "structs for segment\n");
774 printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
775 return;
776 }
777 /* zero data, init phys_addr and free_list, and free pages */
778 memset(pgs, 0, sizeof(struct vm_page) * npages);
779 for (lcv = 0, paddr = ptoa(start) ;
780 lcv < npages ; lcv++, paddr += PAGE_SIZE) {
781 pgs[lcv].phys_addr = paddr;
782 pgs[lcv].free_list = free_list;
783 if (atop(paddr) >= avail_start &&
784 atop(paddr) <= avail_end)
785 uvm_pagefree(&pgs[lcv]);
786 }
787 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */
788 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
789 #endif
790 } else {
791 pgs = NULL;
792 npages = 0;
793 }
794
795 /*
796 * now insert us in the proper place in vm_physmem[]
797 */
798
799 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
800 /* random: put it at the end (easy!) */
801 ps = &vm_physmem[vm_nphysseg];
802 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
803 {
804 int x;
805 /* sort by address for binary search */
806 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
807 if (start < vm_physmem[lcv].start)
808 break;
809 ps = &vm_physmem[lcv];
810 /* move back other entries, if necessary ... */
811 for (x = vm_nphysseg ; x > lcv ; x--)
812 /* structure copy */
813 vm_physmem[x] = vm_physmem[x - 1];
814 }
815 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
816 {
817 int x;
818 /* sort by largest segment first */
819 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
820 if ((end - start) >
821 (vm_physmem[lcv].end - vm_physmem[lcv].start))
822 break;
823 ps = &vm_physmem[lcv];
824 /* move back other entries, if necessary ... */
825 for (x = vm_nphysseg ; x > lcv ; x--)
826 /* structure copy */
827 vm_physmem[x] = vm_physmem[x - 1];
828 }
829 #else
830 panic("uvm_page_physload: unknown physseg strategy selected!");
831 #endif
832
833 ps->start = start;
834 ps->end = end;
835 ps->avail_start = avail_start;
836 ps->avail_end = avail_end;
837 if (preload) {
838 ps->pgs = NULL;
839 } else {
840 ps->pgs = pgs;
841 ps->lastpg = pgs + npages - 1;
842 }
843 ps->free_list = free_list;
844 vm_nphysseg++;
845
846 if (!preload) {
847 uvm_page_rehash();
848 uvmpdpol_reinit();
849 }
850 }
851
852 /*
853 * uvm_page_rehash: reallocate hash table based on number of free pages.
854 */
855
856 void
857 uvm_page_rehash(void)
858 {
859 int freepages, lcv, bucketcount, oldcount, i;
860 struct pglist *newbuckets, *oldbuckets;
861 struct vm_page *pg;
862 size_t newsize, oldsize;
863
864 /*
865 * compute number of pages that can go in the free pool
866 */
867
868 freepages = 0;
869 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
870 freepages +=
871 (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
872
873 /*
874 * compute number of buckets needed for this number of pages
875 */
876
877 bucketcount = 1;
878 while (bucketcount < freepages)
879 bucketcount = bucketcount * 2;
880
881 /*
882 * compute the size of the current table and new table.
883 */
884
885 oldbuckets = uvm.page_hash;
886 oldcount = uvm.page_nhash;
887 oldsize = round_page(sizeof(struct pglist) * oldcount);
888 newsize = round_page(sizeof(struct pglist) * bucketcount);
889
890 /*
891 * allocate the new buckets
892 */
893
894 newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize,
895 0, UVM_KMF_WIRED);
896 if (newbuckets == NULL) {
897 printf("uvm_page_physrehash: WARNING: could not grow page "
898 "hash table\n");
899 return;
900 }
901 for (lcv = 0 ; lcv < bucketcount ; lcv++)
902 TAILQ_INIT(&newbuckets[lcv]);
903
904 /*
905 * now replace the old buckets with the new ones and rehash everything
906 */
907
908 for (i = 0; i < UVM_HASHLOCK_CNT; i++)
909 mutex_spin_enter(&uvm_hashlocks[i].lock);
910
911 uvm.page_hash = newbuckets;
912 uvm.page_nhash = bucketcount;
913 uvm.page_hashmask = bucketcount - 1; /* power of 2 */
914
915 /* ... and rehash */
916 for (lcv = 0 ; lcv < oldcount ; lcv++) {
917 while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
918 TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
919 TAILQ_INSERT_TAIL(
920 &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
921 pg, hashq);
922 }
923 }
924
925 for (i = 0; i < UVM_HASHLOCK_CNT; i++)
926 mutex_spin_exit(&uvm_hashlocks[i].lock);
927
928 /*
929 * free old bucket array if is not the boot-time table
930 */
931
932 if (oldbuckets != &uvm_bootbucket)
933 uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize,
934 UVM_KMF_WIRED);
935 }
936
937 /*
938 * uvm_page_recolor: Recolor the pages if the new bucket count is
939 * larger than the old one.
940 */
941
942 void
943 uvm_page_recolor(int newncolors)
944 {
945 struct pgflbucket *bucketarray, *cpuarray, *oldbucketarray;
946 struct pgfreelist gpgfl, pgfl;
947 struct vm_page *pg;
948 vsize_t bucketcount;
949 int lcv, color, i, ocolors;
950 struct uvm_cpu *ucpu;
951
952 if (newncolors <= uvmexp.ncolors)
953 return;
954
955 if (uvm.page_init_done == false) {
956 uvmexp.ncolors = newncolors;
957 return;
958 }
959
960 bucketcount = newncolors * VM_NFREELIST;
961 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket) * 2,
962 M_VMPAGE, M_NOWAIT);
963 cpuarray = bucketarray + bucketcount;
964 if (bucketarray == NULL) {
965 printf("WARNING: unable to allocate %ld page color buckets\n",
966 (long) bucketcount);
967 return;
968 }
969
970 mutex_spin_enter(&uvm_fpageqlock);
971
972 /* Make sure we should still do this. */
973 if (newncolors <= uvmexp.ncolors) {
974 mutex_spin_exit(&uvm_fpageqlock);
975 free(bucketarray, M_VMPAGE);
976 return;
977 }
978
979 oldbucketarray = uvm.page_free[0].pgfl_buckets;
980 ocolors = uvmexp.ncolors;
981
982 uvmexp.ncolors = newncolors;
983 uvmexp.colormask = uvmexp.ncolors - 1;
984
985 ucpu = curcpu()->ci_data.cpu_uvm;
986 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
987 gpgfl.pgfl_buckets = (bucketarray + (lcv * newncolors));
988 pgfl.pgfl_buckets = (cpuarray + (lcv * uvmexp.ncolors));
989 uvm_page_init_buckets(&gpgfl);
990 uvm_page_init_buckets(&pgfl);
991 for (color = 0; color < ocolors; color++) {
992 for (i = 0; i < PGFL_NQUEUES; i++) {
993 while ((pg = LIST_FIRST(&uvm.page_free[
994 lcv].pgfl_buckets[color].pgfl_queues[i]))
995 != NULL) {
996 LIST_REMOVE(pg, pageq.list); /* global */
997 LIST_REMOVE(pg, listq.list); /* cpu */
998 LIST_INSERT_HEAD(&gpgfl.pgfl_buckets[
999 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
1000 i], pg, pageq.list);
1001 LIST_INSERT_HEAD(&pgfl.pgfl_buckets[
1002 VM_PGCOLOR_BUCKET(pg)].pgfl_queues[
1003 i], pg, listq.list);
1004 }
1005 }
1006 }
1007 uvm.page_free[lcv].pgfl_buckets = gpgfl.pgfl_buckets;
1008 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
1009 }
1010
1011 if (have_recolored_pages) {
1012 mutex_spin_exit(&uvm_fpageqlock);
1013 free(oldbucketarray, M_VMPAGE);
1014 return;
1015 }
1016
1017 have_recolored_pages = true;
1018 mutex_spin_exit(&uvm_fpageqlock);
1019 }
1020
1021 /*
1022 * uvm_cpu_attach: initialize per-CPU data structures.
1023 */
1024
1025 void
1026 uvm_cpu_attach(struct cpu_info *ci)
1027 {
1028 struct pgflbucket *bucketarray;
1029 struct pgfreelist pgfl;
1030 struct uvm_cpu *ucpu;
1031 vsize_t bucketcount;
1032 int lcv;
1033
1034 if (CPU_IS_PRIMARY(ci)) {
1035 /* Already done in uvm_page_init(). */
1036 return;
1037 }
1038
1039 bucketcount = uvmexp.ncolors * VM_NFREELIST;
1040 bucketarray = malloc(bucketcount * sizeof(struct pgflbucket),
1041 M_VMPAGE, M_WAITOK);
1042 ucpu = &uvm.cpus[cpu_index(ci)];
1043 ci->ci_data.cpu_uvm = ucpu;
1044 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
1045 pgfl.pgfl_buckets = (bucketarray + (lcv * uvmexp.ncolors));
1046 uvm_page_init_buckets(&pgfl);
1047 ucpu->page_free[lcv].pgfl_buckets = pgfl.pgfl_buckets;
1048 }
1049 }
1050
1051 /*
1052 * uvm_pagealloc_pgfl: helper routine for uvm_pagealloc_strat
1053 */
1054
1055 static struct vm_page *
1056 uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, int flist, int try1, int try2,
1057 int *trycolorp)
1058 {
1059 struct pgflist *freeq;
1060 struct vm_page *pg;
1061 int color, trycolor = *trycolorp;
1062 struct pgfreelist *gpgfl, *pgfl;
1063
1064 KASSERT(mutex_owned(&uvm_fpageqlock));
1065
1066 color = trycolor;
1067 cpu = false;
1068 pgfl = &ucpu->page_free[flist];
1069 gpgfl = &uvm.page_free[flist];
1070 do {
1071 /* cpu, try1 */
1072 if ((pg = LIST_FIRST((freeq =
1073 &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
1074 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
1075 uvmexp.cpuhit++;
1076 goto gotit;
1077 }
1078 /* global, try1 */
1079 if ((pg = LIST_FIRST((freeq =
1080 &gpgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) {
1081 VM_FREE_PAGE_TO_CPU(pg)->pages[try1]--;
1082 uvmexp.cpumiss++;
1083 goto gotit;
1084 }
1085 /* cpu, try2 */
1086 if ((pg = LIST_FIRST((freeq =
1087 &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
1088 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
1089 uvmexp.cpuhit++;
1090 goto gotit;
1091 }
1092 /* global, try2 */
1093 if ((pg = LIST_FIRST((freeq =
1094 &gpgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) {
1095 VM_FREE_PAGE_TO_CPU(pg)->pages[try2]--;
1096 uvmexp.cpumiss++;
1097 goto gotit;
1098 }
1099 color = (color + 1) & uvmexp.colormask;
1100 } while (color != trycolor);
1101
1102 return (NULL);
1103
1104 gotit:
1105 LIST_REMOVE(pg, pageq.list); /* global list */
1106 LIST_REMOVE(pg, listq.list); /* per-cpu list */
1107 uvmexp.free--;
1108
1109 /* update zero'd page count */
1110 if (pg->flags & PG_ZERO)
1111 uvmexp.zeropages--;
1112
1113 if (color == trycolor)
1114 uvmexp.colorhit++;
1115 else {
1116 uvmexp.colormiss++;
1117 *trycolorp = color;
1118 }
1119
1120 return (pg);
1121 }
1122
1123 /*
1124 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
1125 *
1126 * => return null if no pages free
1127 * => wake up pagedaemon if number of free pages drops below low water mark
1128 * => if obj != NULL, obj must be locked (to put in obj's tree)
1129 * => if anon != NULL, anon must be locked (to put in anon)
1130 * => only one of obj or anon can be non-null
1131 * => caller must activate/deactivate page if it is not wired.
1132 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
1133 * => policy decision: it is more important to pull a page off of the
1134 * appropriate priority free list than it is to get a zero'd or
1135 * unknown contents page. This is because we live with the
1136 * consequences of a bad free list decision for the entire
1137 * lifetime of the page, e.g. if the page comes from memory that
1138 * is slower to access.
1139 */
1140
1141 struct vm_page *
1142 uvm_pagealloc_strat(struct uvm_object *obj, voff_t off, struct vm_anon *anon,
1143 int flags, int strat, int free_list)
1144 {
1145 int lcv, try1, try2, zeroit = 0, color;
1146 struct uvm_cpu *ucpu;
1147 struct vm_page *pg;
1148 bool use_reserve;
1149
1150 KASSERT(obj == NULL || anon == NULL);
1151 KASSERT(anon == NULL || off == 0);
1152 KASSERT(off == trunc_page(off));
1153 KASSERT(obj == NULL || mutex_owned(&obj->vmobjlock));
1154 KASSERT(anon == NULL || mutex_owned(&anon->an_lock));
1155
1156 mutex_spin_enter(&uvm_fpageqlock);
1157
1158 /*
1159 * This implements a global round-robin page coloring
1160 * algorithm.
1161 *
1162 * XXXJRT: What about virtually-indexed caches?
1163 */
1164
1165 ucpu = curcpu()->ci_data.cpu_uvm;
1166 color = ucpu->page_free_nextcolor;
1167
1168 /*
1169 * check to see if we need to generate some free pages waking
1170 * the pagedaemon.
1171 */
1172
1173 uvm_kick_pdaemon();
1174
1175 /*
1176 * fail if any of these conditions is true:
1177 * [1] there really are no free pages, or
1178 * [2] only kernel "reserved" pages remain and
1179 * the page isn't being allocated to a kernel object.
1180 * [3] only pagedaemon "reserved" pages remain and
1181 * the requestor isn't the pagedaemon.
1182 */
1183
1184 use_reserve = (flags & UVM_PGA_USERESERVE) ||
1185 (obj && UVM_OBJ_IS_KERN_OBJECT(obj));
1186 if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
1187 (uvmexp.free <= uvmexp.reserve_pagedaemon &&
1188 !(use_reserve && curlwp == uvm.pagedaemon_lwp)))
1189 goto fail;
1190
1191 #if PGFL_NQUEUES != 2
1192 #error uvm_pagealloc_strat needs to be updated
1193 #endif
1194
1195 /*
1196 * If we want a zero'd page, try the ZEROS queue first, otherwise
1197 * we try the UNKNOWN queue first.
1198 */
1199 if (flags & UVM_PGA_ZERO) {
1200 try1 = PGFL_ZEROS;
1201 try2 = PGFL_UNKNOWN;
1202 } else {
1203 try1 = PGFL_UNKNOWN;
1204 try2 = PGFL_ZEROS;
1205 }
1206
1207 again:
1208 switch (strat) {
1209 case UVM_PGA_STRAT_NORMAL:
1210 /* Check all freelists in descending priority order. */
1211 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
1212 pg = uvm_pagealloc_pgfl(ucpu, lcv,
1213 try1, try2, &color);
1214 if (pg != NULL)
1215 goto gotit;
1216 }
1217
1218 /* No pages free! */
1219 goto fail;
1220
1221 case UVM_PGA_STRAT_ONLY:
1222 case UVM_PGA_STRAT_FALLBACK:
1223 /* Attempt to allocate from the specified free list. */
1224 KASSERT(free_list >= 0 && free_list < VM_NFREELIST);
1225 pg = uvm_pagealloc_pgfl(ucpu, free_list,
1226 try1, try2, &color);
1227 if (pg != NULL)
1228 goto gotit;
1229
1230 /* Fall back, if possible. */
1231 if (strat == UVM_PGA_STRAT_FALLBACK) {
1232 strat = UVM_PGA_STRAT_NORMAL;
1233 goto again;
1234 }
1235
1236 /* No pages free! */
1237 goto fail;
1238
1239 default:
1240 panic("uvm_pagealloc_strat: bad strat %d", strat);
1241 /* NOTREACHED */
1242 }
1243
1244 gotit:
1245 /*
1246 * We now know which color we actually allocated from; set
1247 * the next color accordingly.
1248 */
1249
1250 ucpu->page_free_nextcolor = (color + 1) & uvmexp.colormask;
1251
1252 /*
1253 * update allocation statistics and remember if we have to
1254 * zero the page
1255 */
1256
1257 if (flags & UVM_PGA_ZERO) {
1258 if (pg->flags & PG_ZERO) {
1259 uvmexp.pga_zerohit++;
1260 zeroit = 0;
1261 } else {
1262 uvmexp.pga_zeromiss++;
1263 zeroit = 1;
1264 }
1265 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
1266 ucpu->page_idle_zero = vm_page_zero_enable;
1267 }
1268 }
1269 mutex_spin_exit(&uvm_fpageqlock);
1270
1271 pg->offset = off;
1272 pg->uobject = obj;
1273 pg->uanon = anon;
1274 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
1275 if (anon) {
1276 anon->an_page = pg;
1277 pg->pqflags = PQ_ANON;
1278 atomic_inc_uint(&uvmexp.anonpages);
1279 } else {
1280 if (obj) {
1281 uvm_pageinsert(pg);
1282 }
1283 pg->pqflags = 0;
1284 }
1285 #if defined(UVM_PAGE_TRKOWN)
1286 pg->owner_tag = NULL;
1287 #endif
1288 UVM_PAGE_OWN(pg, "new alloc");
1289
1290 if (flags & UVM_PGA_ZERO) {
1291 /*
1292 * A zero'd page is not clean. If we got a page not already
1293 * zero'd, then we have to zero it ourselves.
1294 */
1295 pg->flags &= ~PG_CLEAN;
1296 if (zeroit)
1297 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1298 }
1299
1300 return(pg);
1301
1302 fail:
1303 mutex_spin_exit(&uvm_fpageqlock);
1304 return (NULL);
1305 }
1306
1307 /*
1308 * uvm_pagereplace: replace a page with another
1309 *
1310 * => object must be locked
1311 */
1312
1313 void
1314 uvm_pagereplace(struct vm_page *oldpg, struct vm_page *newpg)
1315 {
1316
1317 KASSERT((oldpg->flags & PG_TABLED) != 0);
1318 KASSERT(oldpg->uobject != NULL);
1319 KASSERT((newpg->flags & PG_TABLED) == 0);
1320 KASSERT(newpg->uobject == NULL);
1321 KASSERT(mutex_owned(&oldpg->uobject->vmobjlock));
1322
1323 newpg->uobject = oldpg->uobject;
1324 newpg->offset = oldpg->offset;
1325
1326 uvm_pageinsert_after(newpg, oldpg);
1327 uvm_pageremove(oldpg);
1328 }
1329
1330 /*
1331 * uvm_pagerealloc: reallocate a page from one object to another
1332 *
1333 * => both objects must be locked
1334 */
1335
1336 void
1337 uvm_pagerealloc(struct vm_page *pg, struct uvm_object *newobj, voff_t newoff)
1338 {
1339 /*
1340 * remove it from the old object
1341 */
1342
1343 if (pg->uobject) {
1344 uvm_pageremove(pg);
1345 }
1346
1347 /*
1348 * put it in the new object
1349 */
1350
1351 if (newobj) {
1352 pg->uobject = newobj;
1353 pg->offset = newoff;
1354 uvm_pageinsert(pg);
1355 }
1356 }
1357
1358 #ifdef DEBUG
1359 /*
1360 * check if page is zero-filled
1361 *
1362 * - called with free page queue lock held.
1363 */
1364 void
1365 uvm_pagezerocheck(struct vm_page *pg)
1366 {
1367 int *p, *ep;
1368
1369 KASSERT(uvm_zerocheckkva != 0);
1370 KASSERT(mutex_owned(&uvm_fpageqlock));
1371
1372 /*
1373 * XXX assuming pmap_kenter_pa and pmap_kremove never call
1374 * uvm page allocator.
1375 *
1376 * it might be better to have "CPU-local temporary map" pmap interface.
1377 */
1378 pmap_kenter_pa(uvm_zerocheckkva, VM_PAGE_TO_PHYS(pg), VM_PROT_READ);
1379 p = (int *)uvm_zerocheckkva;
1380 ep = (int *)((char *)p + PAGE_SIZE);
1381 pmap_update(pmap_kernel());
1382 while (p < ep) {
1383 if (*p != 0)
1384 panic("PG_ZERO page isn't zero-filled");
1385 p++;
1386 }
1387 pmap_kremove(uvm_zerocheckkva, PAGE_SIZE);
1388 /*
1389 * pmap_update() is not necessary here because no one except us
1390 * uses this VA.
1391 */
1392 }
1393 #endif /* DEBUG */
1394
1395 /*
1396 * uvm_pagefree: free page
1397 *
1398 * => erase page's identity (i.e. remove from object)
1399 * => put page on free list
1400 * => caller must lock owning object (either anon or uvm_object)
1401 * => caller must lock page queues
1402 * => assumes all valid mappings of pg are gone
1403 */
1404
1405 void
1406 uvm_pagefree(struct vm_page *pg)
1407 {
1408 struct pgflist *pgfl;
1409 struct uvm_cpu *ucpu;
1410 int index, color, queue;
1411 bool iszero;
1412
1413 #ifdef DEBUG
1414 if (pg->uobject == (void *)0xdeadbeef &&
1415 pg->uanon == (void *)0xdeadbeef) {
1416 panic("uvm_pagefree: freeing free page %p", pg);
1417 }
1418 #endif /* DEBUG */
1419
1420 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1421 KASSERT(mutex_owned(&uvm_pageqlock) || !uvmpdpol_pageisqueued_p(pg));
1422 KASSERT(pg->uobject == NULL || mutex_owned(&pg->uobject->vmobjlock));
1423 KASSERT(pg->uobject != NULL || pg->uanon == NULL ||
1424 mutex_owned(&pg->uanon->an_lock));
1425
1426 /*
1427 * if the page is loaned, resolve the loan instead of freeing.
1428 */
1429
1430 if (pg->loan_count) {
1431 KASSERT(pg->wire_count == 0);
1432
1433 /*
1434 * if the page is owned by an anon then we just want to
1435 * drop anon ownership. the kernel will free the page when
1436 * it is done with it. if the page is owned by an object,
1437 * remove it from the object and mark it dirty for the benefit
1438 * of possible anon owners.
1439 *
1440 * regardless of previous ownership, wakeup any waiters,
1441 * unbusy the page, and we're done.
1442 */
1443
1444 if (pg->uobject != NULL) {
1445 uvm_pageremove(pg);
1446 pg->flags &= ~PG_CLEAN;
1447 } else if (pg->uanon != NULL) {
1448 if ((pg->pqflags & PQ_ANON) == 0) {
1449 pg->loan_count--;
1450 } else {
1451 pg->pqflags &= ~PQ_ANON;
1452 atomic_dec_uint(&uvmexp.anonpages);
1453 }
1454 pg->uanon->an_page = NULL;
1455 pg->uanon = NULL;
1456 }
1457 if (pg->flags & PG_WANTED) {
1458 wakeup(pg);
1459 }
1460 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_RELEASED|PG_PAGER1);
1461 #ifdef UVM_PAGE_TRKOWN
1462 pg->owner_tag = NULL;
1463 #endif
1464 if (pg->loan_count) {
1465 KASSERT(pg->uobject == NULL);
1466 if (pg->uanon == NULL) {
1467 uvm_pagedequeue(pg);
1468 }
1469 return;
1470 }
1471 }
1472
1473 /*
1474 * remove page from its object or anon.
1475 */
1476
1477 if (pg->uobject != NULL) {
1478 uvm_pageremove(pg);
1479 } else if (pg->uanon != NULL) {
1480 pg->uanon->an_page = NULL;
1481 atomic_dec_uint(&uvmexp.anonpages);
1482 }
1483
1484 /*
1485 * now remove the page from the queues.
1486 */
1487
1488 uvm_pagedequeue(pg);
1489
1490 /*
1491 * if the page was wired, unwire it now.
1492 */
1493
1494 if (pg->wire_count) {
1495 pg->wire_count = 0;
1496 uvmexp.wired--;
1497 }
1498
1499 /*
1500 * and put on free queue
1501 */
1502
1503 iszero = (pg->flags & PG_ZERO);
1504 index = uvm_page_lookup_freelist(pg);
1505 color = VM_PGCOLOR_BUCKET(pg);
1506 queue = (iszero ? PGFL_ZEROS : PGFL_UNKNOWN);
1507
1508 pg->pqflags = PQ_FREE;
1509 #ifdef DEBUG
1510 pg->uobject = (void *)0xdeadbeef;
1511 pg->uanon = (void *)0xdeadbeef;
1512 #endif
1513
1514 mutex_spin_enter(&uvm_fpageqlock);
1515
1516 #ifdef DEBUG
1517 if (iszero)
1518 uvm_pagezerocheck(pg);
1519 #endif /* DEBUG */
1520
1521
1522 /* global list */
1523 pgfl = &uvm.page_free[index].pgfl_buckets[color].pgfl_queues[queue];
1524 LIST_INSERT_HEAD(pgfl, pg, pageq.list);
1525 uvmexp.free++;
1526 if (iszero) {
1527 uvmexp.zeropages++;
1528 }
1529
1530 /* per-cpu list */
1531 ucpu = curcpu()->ci_data.cpu_uvm;
1532 pg->offset = (uintptr_t)ucpu;
1533 pgfl = &ucpu->page_free[index].pgfl_buckets[color].pgfl_queues[queue];
1534 LIST_INSERT_HEAD(pgfl, pg, listq.list);
1535 ucpu->pages[queue]++;
1536 if (ucpu->pages[PGFL_ZEROS] < ucpu->pages[PGFL_UNKNOWN]) {
1537 ucpu->page_idle_zero = vm_page_zero_enable;
1538 }
1539
1540 mutex_spin_exit(&uvm_fpageqlock);
1541 }
1542
1543 /*
1544 * uvm_page_unbusy: unbusy an array of pages.
1545 *
1546 * => pages must either all belong to the same object, or all belong to anons.
1547 * => if pages are object-owned, object must be locked.
1548 * => if pages are anon-owned, anons must be locked.
1549 * => caller must lock page queues if pages may be released.
1550 * => caller must make sure that anon-owned pages are not PG_RELEASED.
1551 */
1552
1553 void
1554 uvm_page_unbusy(struct vm_page **pgs, int npgs)
1555 {
1556 struct vm_page *pg;
1557 int i;
1558 UVMHIST_FUNC("uvm_page_unbusy"); UVMHIST_CALLED(ubchist);
1559
1560 for (i = 0; i < npgs; i++) {
1561 pg = pgs[i];
1562 if (pg == NULL || pg == PGO_DONTCARE) {
1563 continue;
1564 }
1565
1566 KASSERT(pg->uobject == NULL ||
1567 mutex_owned(&pg->uobject->vmobjlock));
1568 KASSERT(pg->uobject != NULL ||
1569 (pg->uanon != NULL && mutex_owned(&pg->uanon->an_lock)));
1570
1571 KASSERT(pg->flags & PG_BUSY);
1572 KASSERT((pg->flags & PG_PAGEOUT) == 0);
1573 if (pg->flags & PG_WANTED) {
1574 wakeup(pg);
1575 }
1576 if (pg->flags & PG_RELEASED) {
1577 UVMHIST_LOG(ubchist, "releasing pg %p", pg,0,0,0);
1578 KASSERT(pg->uobject != NULL ||
1579 (pg->uanon != NULL && pg->uanon->an_ref > 0));
1580 pg->flags &= ~PG_RELEASED;
1581 uvm_pagefree(pg);
1582 } else {
1583 UVMHIST_LOG(ubchist, "unbusying pg %p", pg,0,0,0);
1584 pg->flags &= ~(PG_WANTED|PG_BUSY);
1585 UVM_PAGE_OWN(pg, NULL);
1586 }
1587 }
1588 }
1589
1590 #if defined(UVM_PAGE_TRKOWN)
1591 /*
1592 * uvm_page_own: set or release page ownership
1593 *
1594 * => this is a debugging function that keeps track of who sets PG_BUSY
1595 * and where they do it. it can be used to track down problems
1596 * such a process setting "PG_BUSY" and never releasing it.
1597 * => page's object [if any] must be locked
1598 * => if "tag" is NULL then we are releasing page ownership
1599 */
1600 void
1601 uvm_page_own(struct vm_page *pg, const char *tag)
1602 {
1603 struct uvm_object *uobj;
1604 struct vm_anon *anon;
1605
1606 KASSERT((pg->flags & (PG_PAGEOUT|PG_RELEASED)) == 0);
1607
1608 uobj = pg->uobject;
1609 anon = pg->uanon;
1610 if (uobj != NULL) {
1611 KASSERT(mutex_owned(&uobj->vmobjlock));
1612 } else if (anon != NULL) {
1613 KASSERT(mutex_owned(&anon->an_lock));
1614 }
1615
1616 KASSERT((pg->flags & PG_WANTED) == 0);
1617
1618 /* gain ownership? */
1619 if (tag) {
1620 KASSERT((pg->flags & PG_BUSY) != 0);
1621 if (pg->owner_tag) {
1622 printf("uvm_page_own: page %p already owned "
1623 "by proc %d [%s]\n", pg,
1624 pg->owner, pg->owner_tag);
1625 panic("uvm_page_own");
1626 }
1627 pg->owner = (curproc) ? curproc->p_pid : (pid_t) -1;
1628 pg->lowner = (curlwp) ? curlwp->l_lid : (lwpid_t) -1;
1629 pg->owner_tag = tag;
1630 return;
1631 }
1632
1633 /* drop ownership */
1634 KASSERT((pg->flags & PG_BUSY) == 0);
1635 if (pg->owner_tag == NULL) {
1636 printf("uvm_page_own: dropping ownership of an non-owned "
1637 "page (%p)\n", pg);
1638 panic("uvm_page_own");
1639 }
1640 if (!uvmpdpol_pageisqueued_p(pg)) {
1641 KASSERT((pg->uanon == NULL && pg->uobject == NULL) ||
1642 pg->wire_count > 0);
1643 } else {
1644 KASSERT(pg->wire_count == 0);
1645 }
1646 pg->owner_tag = NULL;
1647 }
1648 #endif
1649
1650 /*
1651 * uvm_pageidlezero: zero free pages while the system is idle.
1652 *
1653 * => try to complete one color bucket at a time, to reduce our impact
1654 * on the CPU cache.
1655 * => we loop until we either reach the target or there is a lwp ready
1656 * to run, or MD code detects a reason to break early.
1657 */
1658 void
1659 uvm_pageidlezero(void)
1660 {
1661 struct vm_page *pg;
1662 struct pgfreelist *pgfl, *gpgfl;
1663 struct uvm_cpu *ucpu;
1664 int free_list, firstbucket, nextbucket;
1665
1666 ucpu = curcpu()->ci_data.cpu_uvm;
1667 if (!ucpu->page_idle_zero ||
1668 ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
1669 ucpu->page_idle_zero = false;
1670 return;
1671 }
1672 mutex_enter(&uvm_fpageqlock);
1673 firstbucket = ucpu->page_free_nextcolor;
1674 nextbucket = firstbucket;
1675 do {
1676 if (sched_curcpu_runnable_p()) {
1677 break;
1678 }
1679 for (free_list = 0; free_list < VM_NFREELIST; free_list++) {
1680 pgfl = &ucpu->page_free[free_list];
1681 gpgfl = &uvm.page_free[free_list];
1682 while ((pg = LIST_FIRST(&pgfl->pgfl_buckets[
1683 nextbucket].pgfl_queues[PGFL_UNKNOWN])) != NULL) {
1684 if (sched_curcpu_runnable_p()) {
1685 goto quit;
1686 }
1687 LIST_REMOVE(pg, pageq.list); /* global list */
1688 LIST_REMOVE(pg, listq.list); /* per-cpu list */
1689 ucpu->pages[PGFL_UNKNOWN]--;
1690 uvmexp.free--;
1691 mutex_spin_exit(&uvm_fpageqlock);
1692 #ifdef PMAP_PAGEIDLEZERO
1693 if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) {
1694
1695 /*
1696 * The machine-dependent code detected
1697 * some reason for us to abort zeroing
1698 * pages, probably because there is a
1699 * process now ready to run.
1700 */
1701
1702 mutex_spin_enter(&uvm_fpageqlock);
1703 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
1704 nextbucket].pgfl_queues[
1705 PGFL_UNKNOWN], pg, pageq.list);
1706 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
1707 nextbucket].pgfl_queues[
1708 PGFL_UNKNOWN], pg, listq.list);
1709 ucpu->pages[PGFL_UNKNOWN]++;
1710 uvmexp.free++;
1711 uvmexp.zeroaborts++;
1712 goto quit;
1713 }
1714 #else
1715 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1716 #endif /* PMAP_PAGEIDLEZERO */
1717 pg->flags |= PG_ZERO;
1718
1719 mutex_spin_enter(&uvm_fpageqlock);
1720 LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[
1721 nextbucket].pgfl_queues[PGFL_ZEROS],
1722 pg, pageq.list);
1723 LIST_INSERT_HEAD(&pgfl->pgfl_buckets[
1724 nextbucket].pgfl_queues[PGFL_ZEROS],
1725 pg, listq.list);
1726 ucpu->pages[PGFL_ZEROS]++;
1727 uvmexp.free++;
1728 uvmexp.zeropages++;
1729 }
1730 }
1731 if (ucpu->pages[PGFL_UNKNOWN] < uvmexp.ncolors) {
1732 break;
1733 }
1734 nextbucket = (nextbucket + 1) & uvmexp.colormask;
1735 } while (nextbucket != firstbucket);
1736 ucpu->page_idle_zero = false;
1737 quit:
1738 mutex_spin_exit(&uvm_fpageqlock);
1739 }
1740
1741 /*
1742 * uvm_pagelookup: look up a page
1743 *
1744 * => caller should lock object to keep someone from pulling the page
1745 * out from under it
1746 */
1747
1748 struct vm_page *
1749 uvm_pagelookup(struct uvm_object *obj, voff_t off)
1750 {
1751 struct vm_page *pg;
1752 struct pglist *buck;
1753 kmutex_t *lock;
1754 u_int hash;
1755
1756 KASSERT(mutex_owned(&obj->vmobjlock));
1757
1758 hash = uvm_pagehash(obj, off);
1759 buck = &uvm.page_hash[hash];
1760 lock = uvm_hashlock(hash);
1761 mutex_spin_enter(lock);
1762 TAILQ_FOREACH(pg, buck, hashq) {
1763 if (pg->uobject == obj && pg->offset == off) {
1764 break;
1765 }
1766 }
1767 mutex_spin_exit(lock);
1768 KASSERT(pg == NULL || obj->uo_npages != 0);
1769 KASSERT(pg == NULL || (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 ||
1770 (pg->flags & PG_BUSY) != 0);
1771 return(pg);
1772 }
1773
1774 /*
1775 * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
1776 *
1777 * => caller must lock page queues
1778 */
1779
1780 void
1781 uvm_pagewire(struct vm_page *pg)
1782 {
1783 KASSERT(mutex_owned(&uvm_pageqlock));
1784 #if defined(READAHEAD_STATS)
1785 if ((pg->pqflags & PQ_READAHEAD) != 0) {
1786 uvm_ra_hit.ev_count++;
1787 pg->pqflags &= ~PQ_READAHEAD;
1788 }
1789 #endif /* defined(READAHEAD_STATS) */
1790 if (pg->wire_count == 0) {
1791 uvm_pagedequeue(pg);
1792 uvmexp.wired++;
1793 }
1794 pg->wire_count++;
1795 }
1796
1797 /*
1798 * uvm_pageunwire: unwire the page.
1799 *
1800 * => activate if wire count goes to zero.
1801 * => caller must lock page queues
1802 */
1803
1804 void
1805 uvm_pageunwire(struct vm_page *pg)
1806 {
1807 KASSERT(mutex_owned(&uvm_pageqlock));
1808 pg->wire_count--;
1809 if (pg->wire_count == 0) {
1810 uvm_pageactivate(pg);
1811 uvmexp.wired--;
1812 }
1813 }
1814
1815 /*
1816 * uvm_pagedeactivate: deactivate page
1817 *
1818 * => caller must lock page queues
1819 * => caller must check to make sure page is not wired
1820 * => object that page belongs to must be locked (so we can adjust pg->flags)
1821 * => caller must clear the reference on the page before calling
1822 */
1823
1824 void
1825 uvm_pagedeactivate(struct vm_page *pg)
1826 {
1827
1828 KASSERT(mutex_owned(&uvm_pageqlock));
1829 KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg));
1830 uvmpdpol_pagedeactivate(pg);
1831 }
1832
1833 /*
1834 * uvm_pageactivate: activate page
1835 *
1836 * => caller must lock page queues
1837 */
1838
1839 void
1840 uvm_pageactivate(struct vm_page *pg)
1841 {
1842
1843 KASSERT(mutex_owned(&uvm_pageqlock));
1844 #if defined(READAHEAD_STATS)
1845 if ((pg->pqflags & PQ_READAHEAD) != 0) {
1846 uvm_ra_hit.ev_count++;
1847 pg->pqflags &= ~PQ_READAHEAD;
1848 }
1849 #endif /* defined(READAHEAD_STATS) */
1850 if (pg->wire_count != 0) {
1851 return;
1852 }
1853 uvmpdpol_pageactivate(pg);
1854 }
1855
1856 /*
1857 * uvm_pagedequeue: remove a page from any paging queue
1858 */
1859
1860 void
1861 uvm_pagedequeue(struct vm_page *pg)
1862 {
1863
1864 if (uvmpdpol_pageisqueued_p(pg)) {
1865 KASSERT(mutex_owned(&uvm_pageqlock));
1866 }
1867
1868 uvmpdpol_pagedequeue(pg);
1869 }
1870
1871 /*
1872 * uvm_pageenqueue: add a page to a paging queue without activating.
1873 * used where a page is not really demanded (yet). eg. read-ahead
1874 */
1875
1876 void
1877 uvm_pageenqueue(struct vm_page *pg)
1878 {
1879
1880 KASSERT(mutex_owned(&uvm_pageqlock));
1881 if (pg->wire_count != 0) {
1882 return;
1883 }
1884 uvmpdpol_pageenqueue(pg);
1885 }
1886
1887 /*
1888 * uvm_pagezero: zero fill a page
1889 *
1890 * => if page is part of an object then the object should be locked
1891 * to protect pg->flags.
1892 */
1893
1894 void
1895 uvm_pagezero(struct vm_page *pg)
1896 {
1897 pg->flags &= ~PG_CLEAN;
1898 pmap_zero_page(VM_PAGE_TO_PHYS(pg));
1899 }
1900
1901 /*
1902 * uvm_pagecopy: copy a page
1903 *
1904 * => if page is part of an object then the object should be locked
1905 * to protect pg->flags.
1906 */
1907
1908 void
1909 uvm_pagecopy(struct vm_page *src, struct vm_page *dst)
1910 {
1911
1912 dst->flags &= ~PG_CLEAN;
1913 pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
1914 }
1915
1916 /*
1917 * uvm_page_lookup_freelist: look up the free list for the specified page
1918 */
1919
1920 int
1921 uvm_page_lookup_freelist(struct vm_page *pg)
1922 {
1923 int lcv;
1924
1925 lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
1926 KASSERT(lcv != -1);
1927 return (vm_physmem[lcv].free_list);
1928 }
1929