uvm_page.c revision 1.32 1 /* $NetBSD: uvm_page.c,v 1.32 2000/04/02 20:39:18 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1997 Charles D. Cranor and Washington University.
5 * Copyright (c) 1991, 1993, The Regents of the University of California.
6 *
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * The Mach Operating System project at Carnegie-Mellon University.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by Charles D. Cranor,
23 * Washington University, the University of California, Berkeley and
24 * its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
42 * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
43 *
44 *
45 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46 * All rights reserved.
47 *
48 * Permission to use, copy, modify and distribute this software and
49 * its documentation is hereby granted, provided that both the copyright
50 * notice and this permission notice appear in all copies of the
51 * software, derivative works or modified versions, and any portions
52 * thereof, and that both notices appear in supporting documentation.
53 *
54 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57 *
58 * Carnegie Mellon requests users of this software to return to
59 *
60 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
61 * School of Computer Science
62 * Carnegie Mellon University
63 * Pittsburgh PA 15213-3890
64 *
65 * any improvements or extensions that they make and grant Carnegie the
66 * rights to redistribute these changes.
67 */
68
69 /*
70 * uvm_page.c: page ops.
71 */
72
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/malloc.h>
76 #include <sys/proc.h>
77
78 #include <vm/vm.h>
79 #include <vm/vm_page.h>
80 #include <vm/vm_kern.h>
81
82 #define UVM_PAGE /* pull in uvm_page.h functions */
83 #include <uvm/uvm.h>
84
85 /*
86 * global vars... XXXCDC: move to uvm. structure.
87 */
88
89 /*
90 * physical memory config is stored in vm_physmem.
91 */
92
93 struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */
94 int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */
95
96 /*
97 * local variables
98 */
99
100 /*
101 * these variables record the values returned by vm_page_bootstrap,
102 * for debugging purposes. The implementation of uvm_pageboot_alloc
103 * and pmap_startup here also uses them internally.
104 */
105
106 static vaddr_t virtual_space_start;
107 static vaddr_t virtual_space_end;
108
109 /*
110 * we use a hash table with only one bucket during bootup. we will
111 * later rehash (resize) the hash table once the allocator is ready.
112 * we static allocate the one bootstrap bucket below...
113 */
114
115 static struct pglist uvm_bootbucket;
116
117 /*
118 * local prototypes
119 */
120
121 static void uvm_pageinsert __P((struct vm_page *));
122
123
124 /*
125 * inline functions
126 */
127
128 /*
129 * uvm_pageinsert: insert a page in the object and the hash table
130 *
131 * => caller must lock object
132 * => caller must lock page queues
133 * => call should have already set pg's object and offset pointers
134 * and bumped the version counter
135 */
136
137 __inline static void
138 uvm_pageinsert(pg)
139 struct vm_page *pg;
140 {
141 struct pglist *buck;
142 int s;
143
144 #ifdef DIAGNOSTIC
145 if (pg->flags & PG_TABLED)
146 panic("uvm_pageinsert: already inserted");
147 #endif
148
149 buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
150 s = splimp();
151 simple_lock(&uvm.hashlock);
152 TAILQ_INSERT_TAIL(buck, pg, hashq); /* put in hash */
153 simple_unlock(&uvm.hashlock);
154 splx(s);
155
156 TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */
157 pg->flags |= PG_TABLED;
158 pg->uobject->uo_npages++;
159
160 }
161
162 /*
163 * uvm_page_remove: remove page from object and hash
164 *
165 * => caller must lock object
166 * => caller must lock page queues
167 */
168
169 void __inline
170 uvm_pageremove(pg)
171 struct vm_page *pg;
172 {
173 struct pglist *buck;
174 int s;
175
176 #ifdef DIAGNOSTIC
177 if ((pg->flags & (PG_FAULTING)) != 0)
178 panic("uvm_pageremove: page is faulting");
179 #endif
180
181 if ((pg->flags & PG_TABLED) == 0)
182 return; /* XXX: log */
183
184 buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
185 s = splimp();
186 simple_lock(&uvm.hashlock);
187 TAILQ_REMOVE(buck, pg, hashq);
188 simple_unlock(&uvm.hashlock);
189 splx(s);
190
191 /* object should be locked */
192 TAILQ_REMOVE(&pg->uobject->memq, pg, listq);
193
194 pg->flags &= ~PG_TABLED;
195 pg->uobject->uo_npages--;
196 pg->uobject = NULL;
197 pg->version++;
198
199 }
200
201 /*
202 * uvm_page_init: init the page system. called from uvm_init().
203 *
204 * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
205 */
206
207 void
208 uvm_page_init(kvm_startp, kvm_endp)
209 vaddr_t *kvm_startp, *kvm_endp;
210 {
211 vsize_t freepages, pagecount, n;
212 vm_page_t pagearray;
213 int lcv, i;
214 paddr_t paddr;
215
216
217 /*
218 * step 1: init the page queues and page queue locks
219 */
220 for (lcv = 0; lcv < VM_NFREELIST; lcv++)
221 TAILQ_INIT(&uvm.page_free[lcv]);
222 TAILQ_INIT(&uvm.page_active);
223 TAILQ_INIT(&uvm.page_inactive_swp);
224 TAILQ_INIT(&uvm.page_inactive_obj);
225 simple_lock_init(&uvm.pageqlock);
226 simple_lock_init(&uvm.fpageqlock);
227
228 /*
229 * step 2: init the <obj,offset> => <page> hash table. for now
230 * we just have one bucket (the bootstrap bucket). later on we
231 * will allocate new buckets as we dynamically resize the hash table.
232 */
233
234 uvm.page_nhash = 1; /* 1 bucket */
235 uvm.page_hashmask = 0; /* mask for hash function */
236 uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */
237 TAILQ_INIT(uvm.page_hash); /* init hash table */
238 simple_lock_init(&uvm.hashlock); /* init hash table lock */
239
240 /*
241 * step 3: allocate vm_page structures.
242 */
243
244 /*
245 * sanity check:
246 * before calling this function the MD code is expected to register
247 * some free RAM with the uvm_page_physload() function. our job
248 * now is to allocate vm_page structures for this memory.
249 */
250
251 if (vm_nphysseg == 0)
252 panic("vm_page_bootstrap: no memory pre-allocated");
253
254 /*
255 * first calculate the number of free pages...
256 *
257 * note that we use start/end rather than avail_start/avail_end.
258 * this allows us to allocate extra vm_page structures in case we
259 * want to return some memory to the pool after booting.
260 */
261
262 freepages = 0;
263 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
264 freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
265
266 /*
267 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
268 * use. for each page of memory we use we need a vm_page structure.
269 * thus, the total number of pages we can use is the total size of
270 * the memory divided by the PAGE_SIZE plus the size of the vm_page
271 * structure. we add one to freepages as a fudge factor to avoid
272 * truncation errors (since we can only allocate in terms of whole
273 * pages).
274 */
275
276 pagecount = ((freepages + 1) << PAGE_SHIFT) /
277 (PAGE_SIZE + sizeof(struct vm_page));
278 pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount *
279 sizeof(struct vm_page));
280 memset(pagearray, 0, pagecount * sizeof(struct vm_page));
281
282 /*
283 * step 4: init the vm_page structures and put them in the correct
284 * place...
285 */
286
287 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
288
289 n = vm_physmem[lcv].end - vm_physmem[lcv].start;
290 if (n > pagecount) {
291 printf("uvm_page_init: lost %ld page(s) in init\n",
292 (long)(n - pagecount));
293 panic("uvm_page_init"); /* XXXCDC: shouldn't happen? */
294 /* n = pagecount; */
295 }
296 /* set up page array pointers */
297 vm_physmem[lcv].pgs = pagearray;
298 pagearray += n;
299 pagecount -= n;
300 vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
301
302 /* init and free vm_pages (we've already zeroed them) */
303 paddr = ptoa(vm_physmem[lcv].start);
304 for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
305 vm_physmem[lcv].pgs[i].phys_addr = paddr;
306 if (atop(paddr) >= vm_physmem[lcv].avail_start &&
307 atop(paddr) <= vm_physmem[lcv].avail_end) {
308 uvmexp.npages++;
309 /* add page to free pool */
310 uvm_pagefree(&vm_physmem[lcv].pgs[i]);
311 }
312 }
313 }
314 /*
315 * step 5: pass up the values of virtual_space_start and
316 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
317 * layers of the VM.
318 */
319
320 *kvm_startp = round_page(virtual_space_start);
321 *kvm_endp = trunc_page(virtual_space_end);
322
323 /*
324 * step 6: init pagedaemon lock
325 */
326
327 simple_lock_init(&uvm.pagedaemon_lock);
328
329 /*
330 * step 7: init reserve thresholds
331 * XXXCDC - values may need adjusting
332 */
333 uvmexp.reserve_pagedaemon = 1;
334 uvmexp.reserve_kernel = 5;
335
336 /*
337 * done!
338 */
339
340 uvm.page_init_done = TRUE;
341 }
342
343 /*
344 * uvm_setpagesize: set the page size
345 *
346 * => sets page_shift and page_mask from uvmexp.pagesize.
347 * => XXXCDC: move global vars.
348 */
349
350 void
351 uvm_setpagesize()
352 {
353 if (uvmexp.pagesize == 0)
354 uvmexp.pagesize = DEFAULT_PAGE_SIZE;
355 uvmexp.pagemask = uvmexp.pagesize - 1;
356 if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
357 panic("uvm_setpagesize: page size not a power of two");
358 for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
359 if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
360 break;
361 }
362
363 /*
364 * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
365 */
366
367 vaddr_t
368 uvm_pageboot_alloc(size)
369 vsize_t size;
370 {
371 #if defined(PMAP_STEAL_MEMORY)
372 vaddr_t addr;
373
374 /*
375 * defer bootstrap allocation to MD code (it may want to allocate
376 * from a direct-mapped segment). pmap_steal_memory should round
377 * off virtual_space_start/virtual_space_end.
378 */
379
380 addr = pmap_steal_memory(size, &virtual_space_start,
381 &virtual_space_end);
382
383 return(addr);
384
385 #else /* !PMAP_STEAL_MEMORY */
386
387 static boolean_t initialized = FALSE;
388 vaddr_t addr, vaddr;
389 paddr_t paddr;
390
391 /* round to page size */
392 size = round_page(size);
393
394 /*
395 * on first call to this function, initialize ourselves.
396 */
397 if (initialized == FALSE) {
398 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
399
400 /* round it the way we like it */
401 virtual_space_start = round_page(virtual_space_start);
402 virtual_space_end = trunc_page(virtual_space_end);
403
404 initialized = TRUE;
405 }
406
407 /*
408 * allocate virtual memory for this request
409 */
410 if (virtual_space_start == virtual_space_end ||
411 (virtual_space_end - virtual_space_start) < size)
412 panic("uvm_pageboot_alloc: out of virtual space");
413
414 addr = virtual_space_start;
415
416 #ifdef PMAP_GROWKERNEL
417 /*
418 * If the kernel pmap can't map the requested space,
419 * then allocate more resources for it.
420 */
421 if (uvm_maxkaddr < (addr + size)) {
422 uvm_maxkaddr = pmap_growkernel(addr + size);
423 if (uvm_maxkaddr < (addr + size))
424 panic("uvm_pageboot_alloc: pmap_growkernel() failed");
425 }
426 #endif
427
428 virtual_space_start += size;
429
430 /*
431 * allocate and mapin physical pages to back new virtual pages
432 */
433
434 for (vaddr = round_page(addr) ; vaddr < addr + size ;
435 vaddr += PAGE_SIZE) {
436
437 if (!uvm_page_physget(&paddr))
438 panic("uvm_pageboot_alloc: out of memory");
439
440 /*
441 * Note this memory is no longer managed, so using
442 * pmap_kenter is safe.
443 */
444 pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
445 }
446 return(addr);
447 #endif /* PMAP_STEAL_MEMORY */
448 }
449
450 #if !defined(PMAP_STEAL_MEMORY)
451 /*
452 * uvm_page_physget: "steal" one page from the vm_physmem structure.
453 *
454 * => attempt to allocate it off the end of a segment in which the "avail"
455 * values match the start/end values. if we can't do that, then we
456 * will advance both values (making them equal, and removing some
457 * vm_page structures from the non-avail area).
458 * => return false if out of memory.
459 */
460
461 /* subroutine: try to allocate from memory chunks on the specified freelist */
462 static boolean_t uvm_page_physget_freelist __P((paddr_t *, int));
463
464 static boolean_t
465 uvm_page_physget_freelist(paddrp, freelist)
466 paddr_t *paddrp;
467 int freelist;
468 {
469 int lcv, x;
470
471 /* pass 1: try allocating from a matching end */
472 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
473 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
474 #else
475 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
476 #endif
477 {
478
479 if (uvm.page_init_done == TRUE)
480 panic("vm_page_physget: called _after_ bootstrap");
481
482 if (vm_physmem[lcv].free_list != freelist)
483 continue;
484
485 /* try from front */
486 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
487 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
488 *paddrp = ptoa(vm_physmem[lcv].avail_start);
489 vm_physmem[lcv].avail_start++;
490 vm_physmem[lcv].start++;
491 /* nothing left? nuke it */
492 if (vm_physmem[lcv].avail_start ==
493 vm_physmem[lcv].end) {
494 if (vm_nphysseg == 1)
495 panic("vm_page_physget: out of memory!");
496 vm_nphysseg--;
497 for (x = lcv ; x < vm_nphysseg ; x++)
498 /* structure copy */
499 vm_physmem[x] = vm_physmem[x+1];
500 }
501 return (TRUE);
502 }
503
504 /* try from rear */
505 if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
506 vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
507 *paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
508 vm_physmem[lcv].avail_end--;
509 vm_physmem[lcv].end--;
510 /* nothing left? nuke it */
511 if (vm_physmem[lcv].avail_end ==
512 vm_physmem[lcv].start) {
513 if (vm_nphysseg == 1)
514 panic("vm_page_physget: out of memory!");
515 vm_nphysseg--;
516 for (x = lcv ; x < vm_nphysseg ; x++)
517 /* structure copy */
518 vm_physmem[x] = vm_physmem[x+1];
519 }
520 return (TRUE);
521 }
522 }
523
524 /* pass2: forget about matching ends, just allocate something */
525 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
526 for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
527 #else
528 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
529 #endif
530 {
531
532 /* any room in this bank? */
533 if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
534 continue; /* nope */
535
536 *paddrp = ptoa(vm_physmem[lcv].avail_start);
537 vm_physmem[lcv].avail_start++;
538 /* truncate! */
539 vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
540
541 /* nothing left? nuke it */
542 if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
543 if (vm_nphysseg == 1)
544 panic("vm_page_physget: out of memory!");
545 vm_nphysseg--;
546 for (x = lcv ; x < vm_nphysseg ; x++)
547 /* structure copy */
548 vm_physmem[x] = vm_physmem[x+1];
549 }
550 return (TRUE);
551 }
552
553 return (FALSE); /* whoops! */
554 }
555
556 boolean_t
557 uvm_page_physget(paddrp)
558 paddr_t *paddrp;
559 {
560 int i;
561
562 /* try in the order of freelist preference */
563 for (i = 0; i < VM_NFREELIST; i++)
564 if (uvm_page_physget_freelist(paddrp, i) == TRUE)
565 return (TRUE);
566 return (FALSE);
567 }
568 #endif /* PMAP_STEAL_MEMORY */
569
570 /*
571 * uvm_page_physload: load physical memory into VM system
572 *
573 * => all args are PFs
574 * => all pages in start/end get vm_page structures
575 * => areas marked by avail_start/avail_end get added to the free page pool
576 * => we are limited to VM_PHYSSEG_MAX physical memory segments
577 */
578
579 void
580 uvm_page_physload(start, end, avail_start, avail_end, free_list)
581 paddr_t start, end, avail_start, avail_end;
582 int free_list;
583 {
584 int preload, lcv;
585 psize_t npages;
586 struct vm_page *pgs;
587 struct vm_physseg *ps;
588
589 if (uvmexp.pagesize == 0)
590 panic("vm_page_physload: page size not set!");
591
592 if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
593 panic("uvm_page_physload: bad free list %d\n", free_list);
594
595 if (start >= end)
596 panic("uvm_page_physload: start >= end");
597
598 /*
599 * do we have room?
600 */
601 if (vm_nphysseg == VM_PHYSSEG_MAX) {
602 printf("vm_page_physload: unable to load physical memory "
603 "segment\n");
604 printf("\t%d segments allocated, ignoring 0x%lx -> 0x%lx\n",
605 VM_PHYSSEG_MAX, start, end);
606 return;
607 }
608
609 /*
610 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
611 * called yet, so malloc is not available).
612 */
613 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
614 if (vm_physmem[lcv].pgs)
615 break;
616 }
617 preload = (lcv == vm_nphysseg);
618
619 /*
620 * if VM is already running, attempt to malloc() vm_page structures
621 */
622 if (!preload) {
623 #if defined(VM_PHYSSEG_NOADD)
624 panic("vm_page_physload: tried to add RAM after vm_mem_init");
625 #else
626 /* XXXCDC: need some sort of lockout for this case */
627 paddr_t paddr;
628 npages = end - start; /* # of pages */
629 MALLOC(pgs, struct vm_page *, sizeof(struct vm_page) * npages,
630 M_VMPAGE, M_NOWAIT);
631 if (pgs == NULL) {
632 printf("vm_page_physload: can not malloc vm_page "
633 "structs for segment\n");
634 printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
635 return;
636 }
637 /* zero data, init phys_addr and free_list, and free pages */
638 memset(pgs, 0, sizeof(struct vm_page) * npages);
639 for (lcv = 0, paddr = ptoa(start) ;
640 lcv < npages ; lcv++, paddr += PAGE_SIZE) {
641 pgs[lcv].phys_addr = paddr;
642 pgs[lcv].free_list = free_list;
643 if (atop(paddr) >= avail_start &&
644 atop(paddr) <= avail_end)
645 uvm_pagefree(&pgs[lcv]);
646 }
647 /* XXXCDC: incomplete: need to update uvmexp.free, what else? */
648 /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
649 #endif
650 } else {
651
652 /* gcc complains if these don't get init'd */
653 pgs = NULL;
654 npages = 0;
655
656 }
657
658 /*
659 * now insert us in the proper place in vm_physmem[]
660 */
661
662 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
663
664 /* random: put it at the end (easy!) */
665 ps = &vm_physmem[vm_nphysseg];
666
667 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
668
669 {
670 int x;
671 /* sort by address for binary search */
672 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
673 if (start < vm_physmem[lcv].start)
674 break;
675 ps = &vm_physmem[lcv];
676 /* move back other entries, if necessary ... */
677 for (x = vm_nphysseg ; x > lcv ; x--)
678 /* structure copy */
679 vm_physmem[x] = vm_physmem[x - 1];
680 }
681
682 #elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
683
684 {
685 int x;
686 /* sort by largest segment first */
687 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
688 if ((end - start) >
689 (vm_physmem[lcv].end - vm_physmem[lcv].start))
690 break;
691 ps = &vm_physmem[lcv];
692 /* move back other entries, if necessary ... */
693 for (x = vm_nphysseg ; x > lcv ; x--)
694 /* structure copy */
695 vm_physmem[x] = vm_physmem[x - 1];
696 }
697
698 #else
699
700 panic("vm_page_physload: unknown physseg strategy selected!");
701
702 #endif
703
704 ps->start = start;
705 ps->end = end;
706 ps->avail_start = avail_start;
707 ps->avail_end = avail_end;
708 if (preload) {
709 ps->pgs = NULL;
710 } else {
711 ps->pgs = pgs;
712 ps->lastpg = pgs + npages - 1;
713 }
714 ps->free_list = free_list;
715 vm_nphysseg++;
716
717 /*
718 * done!
719 */
720
721 if (!preload)
722 uvm_page_rehash();
723
724 return;
725 }
726
727 /*
728 * uvm_page_rehash: reallocate hash table based on number of free pages.
729 */
730
731 void
732 uvm_page_rehash()
733 {
734 int freepages, lcv, bucketcount, s, oldcount;
735 struct pglist *newbuckets, *oldbuckets;
736 struct vm_page *pg;
737 size_t newsize, oldsize;
738
739 /*
740 * compute number of pages that can go in the free pool
741 */
742
743 freepages = 0;
744 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
745 freepages +=
746 (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
747
748 /*
749 * compute number of buckets needed for this number of pages
750 */
751
752 bucketcount = 1;
753 while (bucketcount < freepages)
754 bucketcount = bucketcount * 2;
755
756 /*
757 * compute the size of the current table and new table.
758 */
759
760 oldbuckets = uvm.page_hash;
761 oldcount = uvm.page_nhash;
762 oldsize = round_page(sizeof(struct pglist) * oldcount);
763 newsize = round_page(sizeof(struct pglist) * bucketcount);
764
765 /*
766 * allocate the new buckets
767 */
768
769 newbuckets = (struct pglist *) uvm_km_alloc(kernel_map, newsize);
770 if (newbuckets == NULL) {
771 printf("uvm_page_physrehash: WARNING: could not grow page "
772 "hash table\n");
773 return;
774 }
775 for (lcv = 0 ; lcv < bucketcount ; lcv++)
776 TAILQ_INIT(&newbuckets[lcv]);
777
778 /*
779 * now replace the old buckets with the new ones and rehash everything
780 */
781
782 s = splimp();
783 simple_lock(&uvm.hashlock);
784 uvm.page_hash = newbuckets;
785 uvm.page_nhash = bucketcount;
786 uvm.page_hashmask = bucketcount - 1; /* power of 2 */
787
788 /* ... and rehash */
789 for (lcv = 0 ; lcv < oldcount ; lcv++) {
790 while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
791 TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
792 TAILQ_INSERT_TAIL(
793 &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
794 pg, hashq);
795 }
796 }
797 simple_unlock(&uvm.hashlock);
798 splx(s);
799
800 /*
801 * free old bucket array if is not the boot-time table
802 */
803
804 if (oldbuckets != &uvm_bootbucket)
805 uvm_km_free(kernel_map, (vaddr_t) oldbuckets, oldsize);
806
807 /*
808 * done
809 */
810 return;
811 }
812
813
814 #if 1 /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */
815
816 void uvm_page_physdump __P((void)); /* SHUT UP GCC */
817
818 /* call from DDB */
819 void
820 uvm_page_physdump()
821 {
822 int lcv;
823
824 printf("rehash: physical memory config [segs=%d of %d]:\n",
825 vm_nphysseg, VM_PHYSSEG_MAX);
826 for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
827 printf("0x%lx->0x%lx [0x%lx->0x%lx]\n", vm_physmem[lcv].start,
828 vm_physmem[lcv].end, vm_physmem[lcv].avail_start,
829 vm_physmem[lcv].avail_end);
830 printf("STRATEGY = ");
831 switch (VM_PHYSSEG_STRAT) {
832 case VM_PSTRAT_RANDOM: printf("RANDOM\n"); break;
833 case VM_PSTRAT_BSEARCH: printf("BSEARCH\n"); break;
834 case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break;
835 default: printf("<<UNKNOWN>>!!!!\n");
836 }
837 printf("number of buckets = %d\n", uvm.page_nhash);
838 }
839 #endif
840
841 /*
842 * uvm_pagealloc_strat: allocate vm_page from a particular free list.
843 *
844 * => return null if no pages free
845 * => wake up pagedaemon if number of free pages drops below low water mark
846 * => if obj != NULL, obj must be locked (to put in hash)
847 * => if anon != NULL, anon must be locked (to put in anon)
848 * => only one of obj or anon can be non-null
849 * => caller must activate/deactivate page if it is not wired.
850 * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
851 */
852
853 struct vm_page *
854 uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
855 struct uvm_object *obj;
856 voff_t off;
857 int flags;
858 struct vm_anon *anon;
859 int strat, free_list;
860 {
861 int lcv, s;
862 struct vm_page *pg;
863 struct pglist *freeq;
864 boolean_t use_reserve;
865
866 #ifdef DIAGNOSTIC
867 /* sanity check */
868 if (obj && anon)
869 panic("uvm_pagealloc: obj and anon != NULL");
870 #endif
871
872 s = uvm_lock_fpageq(); /* lock free page queue */
873
874 /*
875 * check to see if we need to generate some free pages waking
876 * the pagedaemon.
877 */
878
879 if (uvmexp.free < uvmexp.freemin || (uvmexp.free < uvmexp.freetarg &&
880 uvmexp.inactive < uvmexp.inactarg))
881 wakeup(&uvm.pagedaemon);
882
883 /*
884 * fail if any of these conditions is true:
885 * [1] there really are no free pages, or
886 * [2] only kernel "reserved" pages remain and
887 * the page isn't being allocated to a kernel object.
888 * [3] only pagedaemon "reserved" pages remain and
889 * the requestor isn't the pagedaemon.
890 */
891
892 use_reserve = (flags & UVM_PGA_USERESERVE) ||
893 (obj && UVM_OBJ_IS_KERN_OBJECT(obj));
894 if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
895 (uvmexp.free <= uvmexp.reserve_pagedaemon &&
896 !(use_reserve && curproc == uvm.pagedaemon_proc)))
897 goto fail;
898
899 again:
900 switch (strat) {
901 case UVM_PGA_STRAT_NORMAL:
902 /* Check all freelists in descending priority order. */
903 for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
904 freeq = &uvm.page_free[lcv];
905 if ((pg = freeq->tqh_first) != NULL)
906 goto gotit;
907 }
908
909 /* No pages free! */
910 goto fail;
911
912 case UVM_PGA_STRAT_ONLY:
913 case UVM_PGA_STRAT_FALLBACK:
914 /* Attempt to allocate from the specified free list. */
915 #ifdef DIAGNOSTIC
916 if (free_list >= VM_NFREELIST || free_list < 0)
917 panic("uvm_pagealloc_strat: bad free list %d",
918 free_list);
919 #endif
920 freeq = &uvm.page_free[free_list];
921 if ((pg = freeq->tqh_first) != NULL)
922 goto gotit;
923
924 /* Fall back, if possible. */
925 if (strat == UVM_PGA_STRAT_FALLBACK) {
926 strat = UVM_PGA_STRAT_NORMAL;
927 goto again;
928 }
929
930 /* No pages free! */
931 goto fail;
932
933 default:
934 panic("uvm_pagealloc_strat: bad strat %d", strat);
935 /* NOTREACHED */
936 }
937
938 gotit:
939 TAILQ_REMOVE(freeq, pg, pageq);
940 uvmexp.free--;
941
942 uvm_unlock_fpageq(s); /* unlock free page queue */
943
944 pg->offset = off;
945 pg->uobject = obj;
946 pg->uanon = anon;
947 pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
948 pg->version++;
949 pg->wire_count = 0;
950 pg->loan_count = 0;
951 if (anon) {
952 anon->u.an_page = pg;
953 pg->pqflags = PQ_ANON;
954 } else {
955 if (obj)
956 uvm_pageinsert(pg);
957 pg->pqflags = 0;
958 }
959 #if defined(UVM_PAGE_TRKOWN)
960 pg->owner_tag = NULL;
961 #endif
962 UVM_PAGE_OWN(pg, "new alloc");
963
964 return(pg);
965
966 fail:
967 uvm_unlock_fpageq(s);
968 return (NULL);
969 }
970
971 /*
972 * uvm_pagerealloc: reallocate a page from one object to another
973 *
974 * => both objects must be locked
975 */
976
977 void
978 uvm_pagerealloc(pg, newobj, newoff)
979 struct vm_page *pg;
980 struct uvm_object *newobj;
981 voff_t newoff;
982 {
983 /*
984 * remove it from the old object
985 */
986
987 if (pg->uobject) {
988 uvm_pageremove(pg);
989 }
990
991 /*
992 * put it in the new object
993 */
994
995 if (newobj) {
996 pg->uobject = newobj;
997 pg->offset = newoff;
998 pg->version++;
999 uvm_pageinsert(pg);
1000 }
1001
1002 return;
1003 }
1004
1005
1006 /*
1007 * uvm_pagefree: free page
1008 *
1009 * => erase page's identity (i.e. remove from hash/object)
1010 * => put page on free list
1011 * => caller must lock owning object (either anon or uvm_object)
1012 * => caller must lock page queues
1013 * => assumes all valid mappings of pg are gone
1014 */
1015
1016 void uvm_pagefree(pg)
1017
1018 struct vm_page *pg;
1019
1020 {
1021 int s;
1022 int saved_loan_count = pg->loan_count;
1023
1024 /*
1025 * if the page was an object page (and thus "TABLED"), remove it
1026 * from the object.
1027 */
1028
1029 if (pg->flags & PG_TABLED) {
1030
1031 /*
1032 * if the object page is on loan we are going to drop ownership.
1033 * it is possible that an anon will take over as owner for this
1034 * page later on. the anon will want a !PG_CLEAN page so that
1035 * it knows it needs to allocate swap if it wants to page the
1036 * page out.
1037 */
1038
1039 if (saved_loan_count)
1040 pg->flags &= ~PG_CLEAN; /* in case an anon takes over */
1041
1042 uvm_pageremove(pg);
1043
1044 /*
1045 * if our page was on loan, then we just lost control over it
1046 * (in fact, if it was loaned to an anon, the anon may have
1047 * already taken over ownership of the page by now and thus
1048 * changed the loan_count [e.g. in uvmfault_anonget()]) we just
1049 * return (when the last loan is dropped, then the page can be
1050 * freed by whatever was holding the last loan).
1051 */
1052 if (saved_loan_count)
1053 return;
1054
1055 } else if (saved_loan_count && (pg->pqflags & PQ_ANON)) {
1056
1057 /*
1058 * if our page is owned by an anon and is loaned out to the
1059 * kernel then we just want to drop ownership and return.
1060 * the kernel must free the page when all its loans clear ...
1061 * note that the kernel can't change the loan status of our
1062 * page as long as we are holding PQ lock.
1063 */
1064 pg->pqflags &= ~PQ_ANON;
1065 pg->uanon = NULL;
1066 return;
1067 }
1068
1069 #ifdef DIAGNOSTIC
1070 if (saved_loan_count) {
1071 printf("uvm_pagefree: warning: freeing page with a loan "
1072 "count of %d\n", saved_loan_count);
1073 panic("uvm_pagefree: loan count");
1074 }
1075 #endif
1076
1077
1078 /*
1079 * now remove the page from the queues
1080 */
1081
1082 if (pg->pqflags & PQ_ACTIVE) {
1083 TAILQ_REMOVE(&uvm.page_active, pg, pageq);
1084 pg->pqflags &= ~PQ_ACTIVE;
1085 uvmexp.active--;
1086 }
1087 if (pg->pqflags & PQ_INACTIVE) {
1088 if (pg->pqflags & PQ_SWAPBACKED)
1089 TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
1090 else
1091 TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
1092 pg->pqflags &= ~PQ_INACTIVE;
1093 uvmexp.inactive--;
1094 }
1095
1096 /*
1097 * if the page was wired, unwire it now.
1098 */
1099 if (pg->wire_count)
1100 {
1101 pg->wire_count = 0;
1102 uvmexp.wired--;
1103 }
1104
1105 /*
1106 * and put on free queue
1107 */
1108
1109 s = uvm_lock_fpageq();
1110 TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(pg)],
1111 pg, pageq);
1112 pg->pqflags = PQ_FREE;
1113 #ifdef DEBUG
1114 pg->uobject = (void *)0xdeadbeef;
1115 pg->offset = 0xdeadbeef;
1116 pg->uanon = (void *)0xdeadbeef;
1117 #endif
1118 uvmexp.free++;
1119 uvm_unlock_fpageq(s);
1120 }
1121
1122 #if defined(UVM_PAGE_TRKOWN)
1123 /*
1124 * uvm_page_own: set or release page ownership
1125 *
1126 * => this is a debugging function that keeps track of who sets PG_BUSY
1127 * and where they do it. it can be used to track down problems
1128 * such a process setting "PG_BUSY" and never releasing it.
1129 * => page's object [if any] must be locked
1130 * => if "tag" is NULL then we are releasing page ownership
1131 */
1132 void
1133 uvm_page_own(pg, tag)
1134 struct vm_page *pg;
1135 char *tag;
1136 {
1137 /* gain ownership? */
1138 if (tag) {
1139 if (pg->owner_tag) {
1140 printf("uvm_page_own: page %p already owned "
1141 "by proc %d [%s]\n", pg,
1142 pg->owner, pg->owner_tag);
1143 panic("uvm_page_own");
1144 }
1145 pg->owner = (curproc) ? curproc->p_pid : (pid_t) -1;
1146 pg->owner_tag = tag;
1147 return;
1148 }
1149
1150 /* drop ownership */
1151 if (pg->owner_tag == NULL) {
1152 printf("uvm_page_own: dropping ownership of an non-owned "
1153 "page (%p)\n", pg);
1154 panic("uvm_page_own");
1155 }
1156 pg->owner_tag = NULL;
1157 return;
1158 }
1159 #endif
1160