vm.c revision 1.162 1 /* $NetBSD: vm.c,v 1.162 2015/04/03 16:40:55 pooka Exp $ */
2
3 /*
4 * Copyright (c) 2007-2011 Antti Kantee. All Rights Reserved.
5 *
6 * Development of this software was supported by
7 * The Finnish Cultural Foundation and the Research Foundation of
8 * The Helsinki University of Technology.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Virtual memory emulation routines.
34 */
35
36 /*
37 * XXX: we abuse pg->uanon for the virtual address of the storage
38 * for each page. phys_addr would fit the job description better,
39 * except that it will create unnecessary lossage on some platforms
40 * due to not being a pointer type.
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.162 2015/04/03 16:40:55 pooka Exp $");
45
46 #include <sys/param.h>
47 #include <sys/atomic.h>
48 #include <sys/buf.h>
49 #include <sys/kernel.h>
50 #include <sys/kmem.h>
51 #include <sys/vmem.h>
52 #include <sys/mman.h>
53 #include <sys/null.h>
54 #include <sys/vnode.h>
55
56 #include <machine/pmap.h>
57
58 #include <rump/rumpuser.h>
59
60 #include <uvm/uvm.h>
61 #include <uvm/uvm_ddb.h>
62 #include <uvm/uvm_pdpolicy.h>
63 #include <uvm/uvm_prot.h>
64 #include <uvm/uvm_readahead.h>
65 #include <uvm/uvm_device.h>
66
67 #include "rump_private.h"
68 #include "rump_vfs_private.h"
69
70 kmutex_t uvm_pageqlock; /* non-free page lock */
71 kmutex_t uvm_fpageqlock; /* free page lock, non-gpl license */
72 kmutex_t uvm_swap_data_lock;
73
74 struct uvmexp uvmexp;
75 struct uvm uvm;
76
77 #ifdef __uvmexp_pagesize
78 const int * const uvmexp_pagesize = &uvmexp.pagesize;
79 const int * const uvmexp_pagemask = &uvmexp.pagemask;
80 const int * const uvmexp_pageshift = &uvmexp.pageshift;
81 #endif
82
83 struct vm_map rump_vmmap;
84
85 static struct vm_map kernel_map_store;
86 struct vm_map *kernel_map = &kernel_map_store;
87
88 static struct vm_map module_map_store;
89 extern struct vm_map *module_map;
90
91 vmem_t *kmem_arena;
92 vmem_t *kmem_va_arena;
93
94 static unsigned int pdaemon_waiters;
95 static kmutex_t pdaemonmtx;
96 static kcondvar_t pdaemoncv, oomwait;
97
98 /* all local non-proc0 processes share this vmspace */
99 struct vmspace *rump_vmspace_local;
100
101 unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED;
102 static unsigned long pdlimit = RUMPMEM_UNLIMITED; /* page daemon memlimit */
103 static unsigned long curphysmem;
104 static unsigned long dddlim; /* 90% of memory limit used */
105 #define NEED_PAGEDAEMON() \
106 (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim)
107 #define PDRESERVE (2*MAXPHYS)
108
109 /*
110 * Try to free two pages worth of pages from objects.
111 * If this succesfully frees a full page cache page, we'll
112 * free the released page plus PAGE_SIZE/sizeof(vm_page).
113 */
114 #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page))
115
116 /*
117 * Keep a list of least recently used pages. Since the only way a
118 * rump kernel can "access" a page is via lookup, we put the page
119 * at the back of queue every time a lookup for it is done. If the
120 * page is in front of this global queue and we're short of memory,
121 * it's a candidate for pageout.
122 */
123 static struct pglist vmpage_lruqueue;
124 static unsigned vmpage_onqueue;
125
126 static int
127 pg_compare_key(void *ctx, const void *n, const void *key)
128 {
129 voff_t a = ((const struct vm_page *)n)->offset;
130 voff_t b = *(const voff_t *)key;
131
132 if (a < b)
133 return -1;
134 else if (a > b)
135 return 1;
136 else
137 return 0;
138 }
139
140 static int
141 pg_compare_nodes(void *ctx, const void *n1, const void *n2)
142 {
143
144 return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset);
145 }
146
147 const rb_tree_ops_t uvm_page_tree_ops = {
148 .rbto_compare_nodes = pg_compare_nodes,
149 .rbto_compare_key = pg_compare_key,
150 .rbto_node_offset = offsetof(struct vm_page, rb_node),
151 .rbto_context = NULL
152 };
153
154 /*
155 * vm pages
156 */
157
158 static int
159 pgctor(void *arg, void *obj, int flags)
160 {
161 struct vm_page *pg = obj;
162
163 memset(pg, 0, sizeof(*pg));
164 pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE,
165 (flags & PR_WAITOK) == PR_WAITOK, "pgalloc");
166 return pg->uanon == NULL;
167 }
168
169 static void
170 pgdtor(void *arg, void *obj)
171 {
172 struct vm_page *pg = obj;
173
174 rump_hyperfree(pg->uanon, PAGE_SIZE);
175 }
176
177 static struct pool_cache pagecache;
178
179 /*
180 * Called with the object locked. We don't support anons.
181 */
182 struct vm_page *
183 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
184 int flags, int strat, int free_list)
185 {
186 struct vm_page *pg;
187
188 KASSERT(uobj && mutex_owned(uobj->vmobjlock));
189 KASSERT(anon == NULL);
190
191 pg = pool_cache_get(&pagecache, PR_NOWAIT);
192 if (__predict_false(pg == NULL)) {
193 return NULL;
194 }
195
196 pg->offset = off;
197 pg->uobject = uobj;
198
199 pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
200 if (flags & UVM_PGA_ZERO) {
201 uvm_pagezero(pg);
202 }
203
204 TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
205 (void)rb_tree_insert_node(&uobj->rb_tree, pg);
206
207 /*
208 * Don't put anons on the LRU page queue. We can't flush them
209 * (there's no concept of swap in a rump kernel), so no reason
210 * to bother with them.
211 */
212 if (!UVM_OBJ_IS_AOBJ(uobj)) {
213 atomic_inc_uint(&vmpage_onqueue);
214 mutex_enter(&uvm_pageqlock);
215 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
216 mutex_exit(&uvm_pageqlock);
217 }
218
219 uobj->uo_npages++;
220
221 return pg;
222 }
223
224 /*
225 * Release a page.
226 *
227 * Called with the vm object locked.
228 */
229 void
230 uvm_pagefree(struct vm_page *pg)
231 {
232 struct uvm_object *uobj = pg->uobject;
233
234 KASSERT(mutex_owned(&uvm_pageqlock));
235 KASSERT(mutex_owned(uobj->vmobjlock));
236
237 if (pg->flags & PG_WANTED)
238 wakeup(pg);
239
240 TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
241
242 uobj->uo_npages--;
243 rb_tree_remove_node(&uobj->rb_tree, pg);
244
245 if (!UVM_OBJ_IS_AOBJ(uobj)) {
246 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
247 atomic_dec_uint(&vmpage_onqueue);
248 }
249
250 pool_cache_put(&pagecache, pg);
251 }
252
253 void
254 uvm_pagezero(struct vm_page *pg)
255 {
256
257 pg->flags &= ~PG_CLEAN;
258 memset((void *)pg->uanon, 0, PAGE_SIZE);
259 }
260
261 /*
262 * uvm_page_locked_p: return true if object associated with page is
263 * locked. this is a weak check for runtime assertions only.
264 */
265
266 bool
267 uvm_page_locked_p(struct vm_page *pg)
268 {
269
270 return mutex_owned(pg->uobject->vmobjlock);
271 }
272
273 /*
274 * Misc routines
275 */
276
277 static kmutex_t pagermtx;
278
279 void
280 uvm_init(void)
281 {
282 char buf[64];
283
284 if (rumpuser_getparam("RUMP_MEMLIMIT", buf, sizeof(buf)) == 0) {
285 unsigned long tmp;
286 char *ep;
287 int mult;
288
289 tmp = strtoul(buf, &ep, 10);
290 if (strlen(ep) > 1)
291 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
292
293 /* mini-dehumanize-number */
294 mult = 1;
295 switch (*ep) {
296 case 'k':
297 mult = 1024;
298 break;
299 case 'm':
300 mult = 1024*1024;
301 break;
302 case 'g':
303 mult = 1024*1024*1024;
304 break;
305 case 0:
306 break;
307 default:
308 panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
309 }
310 rump_physmemlimit = tmp * mult;
311
312 if (rump_physmemlimit / mult != tmp)
313 panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf);
314
315 /* reserve some memory for the pager */
316 if (rump_physmemlimit <= PDRESERVE)
317 panic("uvm_init: system reserves %d bytes of mem, "
318 "only %lu bytes given",
319 PDRESERVE, rump_physmemlimit);
320 pdlimit = rump_physmemlimit;
321 rump_physmemlimit -= PDRESERVE;
322
323 if (pdlimit < 1024*1024)
324 printf("uvm_init: WARNING: <1MB RAM limit, "
325 "hope you know what you're doing\n");
326
327 #define HUMANIZE_BYTES 9
328 CTASSERT(sizeof(buf) >= HUMANIZE_BYTES);
329 format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit);
330 #undef HUMANIZE_BYTES
331 dddlim = 9 * (rump_physmemlimit / 10);
332 } else {
333 strlcpy(buf, "unlimited (host limit)", sizeof(buf));
334 }
335 aprint_verbose("total memory = %s\n", buf);
336
337 TAILQ_INIT(&vmpage_lruqueue);
338
339 if (rump_physmemlimit == RUMPMEM_UNLIMITED) {
340 uvmexp.npages = physmem;
341 } else {
342 uvmexp.npages = pdlimit >> PAGE_SHIFT;
343 uvmexp.reserve_pagedaemon = PDRESERVE >> PAGE_SHIFT;
344 uvmexp.freetarg = (rump_physmemlimit-dddlim) >> PAGE_SHIFT;
345 }
346 /*
347 * uvmexp.free is not used internally or updated. The reason is
348 * that the memory hypercall allocator is allowed to allocate
349 * non-page sized chunks. We use a byte count in curphysmem
350 * instead.
351 */
352 uvmexp.free = uvmexp.npages;
353
354 #ifndef __uvmexp_pagesize
355 uvmexp.pagesize = PAGE_SIZE;
356 uvmexp.pagemask = PAGE_MASK;
357 uvmexp.pageshift = PAGE_SHIFT;
358 #else
359 #define FAKE_PAGE_SHIFT 12
360 uvmexp.pageshift = FAKE_PAGE_SHIFT;
361 uvmexp.pagesize = 1<<FAKE_PAGE_SHIFT;
362 uvmexp.pagemask = (1<<FAKE_PAGE_SHIFT)-1;
363 #undef FAKE_PAGE_SHIFT
364 #endif
365
366 mutex_init(&pagermtx, MUTEX_DEFAULT, IPL_NONE);
367 mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, IPL_NONE);
368 mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, IPL_NONE);
369
370 /* just to appease linkage */
371 mutex_init(&uvm_fpageqlock, MUTEX_SPIN, IPL_VM);
372
373 mutex_init(&pdaemonmtx, MUTEX_DEFAULT, IPL_NONE);
374 cv_init(&pdaemoncv, "pdaemon");
375 cv_init(&oomwait, "oomwait");
376
377 module_map = &module_map_store;
378
379 kernel_map->pmap = pmap_kernel();
380
381 pool_subsystem_init();
382
383 kmem_arena = vmem_create("kmem", 0, 1024*1024, PAGE_SIZE,
384 NULL, NULL, NULL,
385 0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);
386
387 vmem_subsystem_init(kmem_arena);
388
389 kmem_va_arena = vmem_create("kva", 0, 0, PAGE_SIZE,
390 vmem_alloc, vmem_free, kmem_arena,
391 8 * PAGE_SIZE, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);
392
393 pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0,
394 "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL);
395
396 /* create vmspace used by local clients */
397 rump_vmspace_local = kmem_zalloc(sizeof(*rump_vmspace_local), KM_SLEEP);
398 uvmspace_init(rump_vmspace_local, (struct pmap *)-2, 0, 0, false);
399 }
400
401 void
402 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax,
403 bool topdown)
404 {
405
406 vm->vm_map.pmap = pmap;
407 vm->vm_refcnt = 1;
408 }
409
410 void
411 uvm_pagewire(struct vm_page *pg)
412 {
413
414 /* nada */
415 }
416
417 void
418 uvm_pageunwire(struct vm_page *pg)
419 {
420
421 /* nada */
422 }
423
424 /* where's your schmonz now? */
425 #define PUNLIMIT(a) \
426 p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY;
427 void
428 uvm_init_limits(struct proc *p)
429 {
430
431 #ifndef DFLSSIZ
432 #define DFLSSIZ (16*1024*1024)
433 #endif
434 p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
435 p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
436 PUNLIMIT(RLIMIT_DATA);
437 PUNLIMIT(RLIMIT_RSS);
438 PUNLIMIT(RLIMIT_AS);
439 /* nice, cascade */
440 }
441 #undef PUNLIMIT
442
443 /*
444 * This satisfies the "disgusting mmap hack" used by proplib.
445 */
446 int
447 uvm_mmap_anon(struct proc *p, void **addrp, size_t size)
448 {
449 int error;
450
451 /* no reason in particular, but cf. uvm_default_mapaddr() */
452 if (*addrp != NULL)
453 panic("uvm_mmap() variant unsupported");
454
455 if (RUMP_LOCALPROC_P(curproc)) {
456 error = rumpuser_anonmmap(NULL, size, 0, 0, addrp);
457 } else {
458 error = rump_sysproxy_anonmmap(p->p_vmspace->vm_map.pmap,
459 size, addrp);
460 }
461 return error;
462 }
463
464 /*
465 * Stubs for things referenced from vfs_vnode.c but not used.
466 */
467 const dev_t zerodev;
468
469 struct uvm_object *
470 udv_attach(dev_t device, vm_prot_t accessprot, voff_t off, vsize_t size)
471 {
472 return NULL;
473 }
474
475 struct pagerinfo {
476 vaddr_t pgr_kva;
477 int pgr_npages;
478 struct vm_page **pgr_pgs;
479 bool pgr_read;
480
481 LIST_ENTRY(pagerinfo) pgr_entries;
482 };
483 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist);
484
485 /*
486 * Pager "map" in routine. Instead of mapping, we allocate memory
487 * and copy page contents there. The reason for copying instead of
488 * mapping is simple: we do not assume we are running on virtual
489 * memory. Even if we could emulate virtual memory in some envs
490 * such as userspace, copying is much faster than trying to awkardly
491 * cope with remapping (see "Design and Implementation" pp.95-98).
492 * The downside of the approach is that the pager requires MAXPHYS
493 * free memory to perform paging, but short of virtual memory or
494 * making the pager do I/O in page-sized chunks we cannot do much
495 * about that.
496 */
497 vaddr_t
498 uvm_pagermapin(struct vm_page **pgs, int npages, int flags)
499 {
500 struct pagerinfo *pgri;
501 vaddr_t curkva;
502 int i;
503
504 /* allocate structures */
505 pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP);
506 pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP);
507 pgri->pgr_npages = npages;
508 pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP);
509 pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0;
510
511 /* copy contents to "mapped" memory */
512 for (i = 0, curkva = pgri->pgr_kva;
513 i < npages;
514 i++, curkva += PAGE_SIZE) {
515 /*
516 * We need to copy the previous contents of the pages to
517 * the window even if we are reading from the
518 * device, since the device might not fill the contents of
519 * the full mapped range and we will end up corrupting
520 * data when we unmap the window.
521 */
522 memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE);
523 pgri->pgr_pgs[i] = pgs[i];
524 }
525
526 mutex_enter(&pagermtx);
527 LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries);
528 mutex_exit(&pagermtx);
529
530 return pgri->pgr_kva;
531 }
532
533 /*
534 * map out the pager window. return contents from VA to page storage
535 * and free structures.
536 *
537 * Note: does not currently support partial frees
538 */
539 void
540 uvm_pagermapout(vaddr_t kva, int npages)
541 {
542 struct pagerinfo *pgri;
543 vaddr_t curkva;
544 int i;
545
546 mutex_enter(&pagermtx);
547 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
548 if (pgri->pgr_kva == kva)
549 break;
550 }
551 KASSERT(pgri);
552 if (pgri->pgr_npages != npages)
553 panic("uvm_pagermapout: partial unmapping not supported");
554 LIST_REMOVE(pgri, pgr_entries);
555 mutex_exit(&pagermtx);
556
557 if (pgri->pgr_read) {
558 for (i = 0, curkva = pgri->pgr_kva;
559 i < pgri->pgr_npages;
560 i++, curkva += PAGE_SIZE) {
561 memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE);
562 }
563 }
564
565 kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *));
566 kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE);
567 kmem_free(pgri, sizeof(*pgri));
568 }
569
570 /*
571 * convert va in pager window to page structure.
572 * XXX: how expensive is this (global lock, list traversal)?
573 */
574 struct vm_page *
575 uvm_pageratop(vaddr_t va)
576 {
577 struct pagerinfo *pgri;
578 struct vm_page *pg = NULL;
579 int i;
580
581 mutex_enter(&pagermtx);
582 LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
583 if (pgri->pgr_kva <= va
584 && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE)
585 break;
586 }
587 if (pgri) {
588 i = (va - pgri->pgr_kva) >> PAGE_SHIFT;
589 pg = pgri->pgr_pgs[i];
590 }
591 mutex_exit(&pagermtx);
592
593 return pg;
594 }
595
596 /*
597 * Called with the vm object locked.
598 *
599 * Put vnode object pages at the end of the access queue to indicate
600 * they have been recently accessed and should not be immediate
601 * candidates for pageout. Do not do this for lookups done by
602 * the pagedaemon to mimic pmap_kentered mappings which don't track
603 * access information.
604 */
605 struct vm_page *
606 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
607 {
608 struct vm_page *pg;
609 bool ispagedaemon = curlwp == uvm.pagedaemon_lwp;
610
611 pg = rb_tree_find_node(&uobj->rb_tree, &off);
612 if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) {
613 mutex_enter(&uvm_pageqlock);
614 TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
615 TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
616 mutex_exit(&uvm_pageqlock);
617 }
618
619 return pg;
620 }
621
622 void
623 uvm_page_unbusy(struct vm_page **pgs, int npgs)
624 {
625 struct vm_page *pg;
626 int i;
627
628 KASSERT(npgs > 0);
629 KASSERT(mutex_owned(pgs[0]->uobject->vmobjlock));
630
631 for (i = 0; i < npgs; i++) {
632 pg = pgs[i];
633 if (pg == NULL)
634 continue;
635
636 KASSERT(pg->flags & PG_BUSY);
637 if (pg->flags & PG_WANTED)
638 wakeup(pg);
639 if (pg->flags & PG_RELEASED)
640 uvm_pagefree(pg);
641 else
642 pg->flags &= ~(PG_WANTED|PG_BUSY);
643 }
644 }
645
646 void
647 uvm_estimatepageable(int *active, int *inactive)
648 {
649
650 /* XXX: guessing game */
651 *active = 1024;
652 *inactive = 1024;
653 }
654
655 bool
656 vm_map_starved_p(struct vm_map *map)
657 {
658
659 if (map->flags & VM_MAP_WANTVA)
660 return true;
661
662 return false;
663 }
664
665 int
666 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
667 {
668
669 panic("%s: unimplemented", __func__);
670 }
671
672 void
673 uvm_unloan(void *v, int npages, int flags)
674 {
675
676 panic("%s: unimplemented", __func__);
677 }
678
679 int
680 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
681 struct vm_page **opp)
682 {
683
684 return EBUSY;
685 }
686
687 struct vm_page *
688 uvm_loanbreak(struct vm_page *pg)
689 {
690
691 panic("%s: unimplemented", __func__);
692 }
693
694 void
695 ubc_purge(struct uvm_object *uobj)
696 {
697
698 }
699
700 vaddr_t
701 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz)
702 {
703
704 return 0;
705 }
706
707 int
708 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
709 vm_prot_t prot, bool set_max)
710 {
711
712 return EOPNOTSUPP;
713 }
714
715 /*
716 * UVM km
717 */
718
719 vaddr_t
720 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
721 {
722 void *rv, *desired = NULL;
723 int alignbit, error;
724
725 #ifdef __x86_64__
726 /*
727 * On amd64, allocate all module memory from the lowest 2GB.
728 * This is because NetBSD kernel modules are compiled
729 * with -mcmodel=kernel and reserve only 4 bytes for
730 * offsets. If we load code compiled with -mcmodel=kernel
731 * anywhere except the lowest or highest 2GB, it will not
732 * work. Since userspace does not have access to the highest
733 * 2GB, use the lowest 2GB.
734 *
735 * Note: this assumes the rump kernel resides in
736 * the lowest 2GB as well.
737 *
738 * Note2: yes, it's a quick hack, but since this the only
739 * place where we care about the map we're allocating from,
740 * just use a simple "if" instead of coming up with a fancy
741 * generic solution.
742 */
743 if (map == module_map) {
744 desired = (void *)(0x80000000 - size);
745 }
746 #endif
747
748 if (__predict_false(map == module_map)) {
749 alignbit = 0;
750 if (align) {
751 alignbit = ffs(align)-1;
752 }
753 error = rumpuser_anonmmap(desired, size, alignbit,
754 flags & UVM_KMF_EXEC, &rv);
755 } else {
756 error = rumpuser_malloc(size, align, &rv);
757 }
758
759 if (error) {
760 if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT))
761 return 0;
762 else
763 panic("uvm_km_alloc failed");
764 }
765
766 if (flags & UVM_KMF_ZERO)
767 memset(rv, 0, size);
768
769 return (vaddr_t)rv;
770 }
771
772 void
773 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
774 {
775
776 if (__predict_false(map == module_map))
777 rumpuser_unmap((void *)vaddr, size);
778 else
779 rumpuser_free((void *)vaddr, size);
780 }
781
782 struct vm_map *
783 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
784 vsize_t size, int pageable, bool fixed, struct vm_map *submap)
785 {
786
787 return (struct vm_map *)417416;
788 }
789
790 int
791 uvm_km_kmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags,
792 vmem_addr_t *addr)
793 {
794 vaddr_t va;
795 va = (vaddr_t)rump_hypermalloc(size, PAGE_SIZE,
796 (flags & VM_SLEEP), "kmalloc");
797
798 if (va) {
799 *addr = va;
800 return 0;
801 } else {
802 return ENOMEM;
803 }
804 }
805
806 void
807 uvm_km_kmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
808 {
809
810 rump_hyperfree((void *)addr, size);
811 }
812
813 /*
814 * VM space locking routines. We don't really have to do anything,
815 * since the pages are always "wired" (both local and remote processes).
816 */
817 int
818 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access)
819 {
820
821 return 0;
822 }
823
824 void
825 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
826 {
827
828 }
829
830 /*
831 * For the local case the buffer mappers don't need to do anything.
832 * For the remote case we need to reserve space and copy data in or
833 * out, depending on B_READ/B_WRITE.
834 */
835 int
836 vmapbuf(struct buf *bp, vsize_t len)
837 {
838 int error = 0;
839
840 bp->b_saveaddr = bp->b_data;
841
842 /* remote case */
843 if (!RUMP_LOCALPROC_P(curproc)) {
844 bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf");
845 if (BUF_ISWRITE(bp)) {
846 error = copyin(bp->b_saveaddr, bp->b_data, len);
847 if (error) {
848 rump_hyperfree(bp->b_data, len);
849 bp->b_data = bp->b_saveaddr;
850 bp->b_saveaddr = 0;
851 }
852 }
853 }
854
855 return error;
856 }
857
858 void
859 vunmapbuf(struct buf *bp, vsize_t len)
860 {
861
862 /* remote case */
863 if (!RUMP_LOCALPROC_P(bp->b_proc)) {
864 if (BUF_ISREAD(bp)) {
865 bp->b_error = copyout_proc(bp->b_proc,
866 bp->b_data, bp->b_saveaddr, len);
867 }
868 rump_hyperfree(bp->b_data, len);
869 }
870
871 bp->b_data = bp->b_saveaddr;
872 bp->b_saveaddr = 0;
873 }
874
875 void
876 uvmspace_addref(struct vmspace *vm)
877 {
878
879 /*
880 * No dynamically allocated vmspaces exist.
881 */
882 }
883
884 void
885 uvmspace_free(struct vmspace *vm)
886 {
887
888 /* nothing for now */
889 }
890
891 /*
892 * page life cycle stuff. it really doesn't exist, so just stubs.
893 */
894
895 void
896 uvm_pageactivate(struct vm_page *pg)
897 {
898
899 /* nada */
900 }
901
902 void
903 uvm_pagedeactivate(struct vm_page *pg)
904 {
905
906 /* nada */
907 }
908
909 void
910 uvm_pagedequeue(struct vm_page *pg)
911 {
912
913 /* nada*/
914 }
915
916 void
917 uvm_pageenqueue(struct vm_page *pg)
918 {
919
920 /* nada */
921 }
922
923 void
924 uvmpdpol_anfree(struct vm_anon *an)
925 {
926
927 /* nada */
928 }
929
930 /*
931 * Physical address accessors.
932 */
933
934 struct vm_page *
935 uvm_phys_to_vm_page(paddr_t pa)
936 {
937
938 return NULL;
939 }
940
941 paddr_t
942 uvm_vm_page_to_phys(const struct vm_page *pg)
943 {
944
945 return 0;
946 }
947
948 vaddr_t
949 uvm_uarea_alloc(void)
950 {
951
952 /* non-zero */
953 return (vaddr_t)11;
954 }
955
956 void
957 uvm_uarea_free(vaddr_t uarea)
958 {
959
960 /* nata, so creamy */
961 }
962
963 /*
964 * Routines related to the Page Baroness.
965 */
966
967 void
968 uvm_wait(const char *msg)
969 {
970
971 if (__predict_false(rump_threads == 0))
972 panic("pagedaemon missing (RUMP_THREADS = 0)");
973
974 if (curlwp == uvm.pagedaemon_lwp) {
975 /* is it possible for us to later get memory? */
976 if (!uvmexp.paging)
977 panic("pagedaemon out of memory");
978 }
979
980 mutex_enter(&pdaemonmtx);
981 pdaemon_waiters++;
982 cv_signal(&pdaemoncv);
983 cv_wait(&oomwait, &pdaemonmtx);
984 mutex_exit(&pdaemonmtx);
985 }
986
987 void
988 uvm_pageout_start(int npages)
989 {
990
991 mutex_enter(&pdaemonmtx);
992 uvmexp.paging += npages;
993 mutex_exit(&pdaemonmtx);
994 }
995
996 void
997 uvm_pageout_done(int npages)
998 {
999
1000 if (!npages)
1001 return;
1002
1003 mutex_enter(&pdaemonmtx);
1004 KASSERT(uvmexp.paging >= npages);
1005 uvmexp.paging -= npages;
1006
1007 if (pdaemon_waiters) {
1008 pdaemon_waiters = 0;
1009 cv_broadcast(&oomwait);
1010 }
1011 mutex_exit(&pdaemonmtx);
1012 }
1013
1014 static bool
1015 processpage(struct vm_page *pg, bool *lockrunning)
1016 {
1017 struct uvm_object *uobj;
1018
1019 uobj = pg->uobject;
1020 if (mutex_tryenter(uobj->vmobjlock)) {
1021 if ((pg->flags & PG_BUSY) == 0) {
1022 mutex_exit(&uvm_pageqlock);
1023 uobj->pgops->pgo_put(uobj, pg->offset,
1024 pg->offset + PAGE_SIZE,
1025 PGO_CLEANIT|PGO_FREE);
1026 KASSERT(!mutex_owned(uobj->vmobjlock));
1027 return true;
1028 } else {
1029 mutex_exit(uobj->vmobjlock);
1030 }
1031 } else if (*lockrunning == false && ncpu > 1) {
1032 CPU_INFO_ITERATOR cii;
1033 struct cpu_info *ci;
1034 struct lwp *l;
1035
1036 l = mutex_owner(uobj->vmobjlock);
1037 for (CPU_INFO_FOREACH(cii, ci)) {
1038 if (ci->ci_curlwp == l) {
1039 *lockrunning = true;
1040 break;
1041 }
1042 }
1043 }
1044
1045 return false;
1046 }
1047
1048 /*
1049 * The Diabolical pageDaemon Director (DDD).
1050 *
1051 * This routine can always use better heuristics.
1052 */
1053 void
1054 uvm_pageout(void *arg)
1055 {
1056 struct vm_page *pg;
1057 struct pool *pp, *pp_first;
1058 int cleaned, skip, skipped;
1059 bool succ;
1060 bool lockrunning;
1061
1062 mutex_enter(&pdaemonmtx);
1063 for (;;) {
1064 if (!NEED_PAGEDAEMON()) {
1065 kernel_map->flags &= ~VM_MAP_WANTVA;
1066 }
1067
1068 if (pdaemon_waiters) {
1069 pdaemon_waiters = 0;
1070 cv_broadcast(&oomwait);
1071 }
1072
1073 cv_wait(&pdaemoncv, &pdaemonmtx);
1074 uvmexp.pdwoke++;
1075
1076 /* tell the world that we are hungry */
1077 kernel_map->flags |= VM_MAP_WANTVA;
1078 mutex_exit(&pdaemonmtx);
1079
1080 /*
1081 * step one: reclaim the page cache. this should give
1082 * us the biggest earnings since whole pages are released
1083 * into backing memory.
1084 */
1085 pool_cache_reclaim(&pagecache);
1086 if (!NEED_PAGEDAEMON()) {
1087 mutex_enter(&pdaemonmtx);
1088 continue;
1089 }
1090
1091 /*
1092 * Ok, so that didn't help. Next, try to hunt memory
1093 * by pushing out vnode pages. The pages might contain
1094 * useful cached data, but we need the memory.
1095 */
1096 cleaned = 0;
1097 skip = 0;
1098 lockrunning = false;
1099 again:
1100 mutex_enter(&uvm_pageqlock);
1101 while (cleaned < PAGEDAEMON_OBJCHUNK) {
1102 skipped = 0;
1103 TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) {
1104
1105 /*
1106 * skip over pages we _might_ have tried
1107 * to handle earlier. they might not be
1108 * exactly the same ones, but I'm not too
1109 * concerned.
1110 */
1111 while (skipped++ < skip)
1112 continue;
1113
1114 if (processpage(pg, &lockrunning)) {
1115 cleaned++;
1116 goto again;
1117 }
1118
1119 skip++;
1120 }
1121 break;
1122 }
1123 mutex_exit(&uvm_pageqlock);
1124
1125 /*
1126 * Ok, someone is running with an object lock held.
1127 * We want to yield the host CPU to make sure the
1128 * thread is not parked on the host. Since sched_yield()
1129 * doesn't appear to do anything on NetBSD, nanosleep
1130 * for the smallest possible time and hope we're back in
1131 * the game soon.
1132 */
1133 if (cleaned == 0 && lockrunning) {
1134 rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL, 0, 1);
1135
1136 lockrunning = false;
1137 skip = 0;
1138
1139 /* and here we go again */
1140 goto again;
1141 }
1142
1143 /*
1144 * And of course we need to reclaim the page cache
1145 * again to actually release memory.
1146 */
1147 pool_cache_reclaim(&pagecache);
1148 if (!NEED_PAGEDAEMON()) {
1149 mutex_enter(&pdaemonmtx);
1150 continue;
1151 }
1152
1153 /*
1154 * And then drain the pools. Wipe them out ... all of them.
1155 */
1156 for (pp_first = NULL;;) {
1157 rump_vfs_drainbufs(10 /* XXX: estimate! */);
1158
1159 succ = pool_drain(&pp);
1160 if (succ || pp == pp_first)
1161 break;
1162
1163 if (pp_first == NULL)
1164 pp_first = pp;
1165 }
1166
1167 /*
1168 * Need to use PYEC on our bag of tricks.
1169 * Unfortunately, the wife just borrowed it.
1170 */
1171
1172 mutex_enter(&pdaemonmtx);
1173 if (!succ && cleaned == 0 && pdaemon_waiters &&
1174 uvmexp.paging == 0) {
1175 rumpuser_dprintf("pagedaemoness: failed to reclaim "
1176 "memory ... sleeping (deadlock?)\n");
1177 cv_timedwait(&pdaemoncv, &pdaemonmtx, hz);
1178 }
1179 }
1180
1181 panic("you can swap out any time you like, but you can never leave");
1182 }
1183
1184 void
1185 uvm_kick_pdaemon()
1186 {
1187
1188 /*
1189 * Wake up the diabolical pagedaemon director if we are over
1190 * 90% of the memory limit. This is a complete and utter
1191 * stetson-harrison decision which you are allowed to finetune.
1192 * Don't bother locking. If we have some unflushed caches,
1193 * other waker-uppers will deal with the issue.
1194 */
1195 if (NEED_PAGEDAEMON()) {
1196 cv_signal(&pdaemoncv);
1197 }
1198 }
1199
1200 void *
1201 rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg)
1202 {
1203 const unsigned long thelimit =
1204 curlwp == uvm.pagedaemon_lwp ? pdlimit : rump_physmemlimit;
1205 unsigned long newmem;
1206 void *rv;
1207 int error;
1208
1209 uvm_kick_pdaemon(); /* ouch */
1210
1211 /* first we must be within the limit */
1212 limitagain:
1213 if (thelimit != RUMPMEM_UNLIMITED) {
1214 newmem = atomic_add_long_nv(&curphysmem, howmuch);
1215 if (newmem > thelimit) {
1216 newmem = atomic_add_long_nv(&curphysmem, -howmuch);
1217 if (!waitok) {
1218 return NULL;
1219 }
1220 uvm_wait(wmsg);
1221 goto limitagain;
1222 }
1223 }
1224
1225 /* second, we must get something from the backend */
1226 again:
1227 error = rumpuser_malloc(howmuch, alignment, &rv);
1228 if (__predict_false(error && waitok)) {
1229 uvm_wait(wmsg);
1230 goto again;
1231 }
1232
1233 return rv;
1234 }
1235
1236 void
1237 rump_hyperfree(void *what, size_t size)
1238 {
1239
1240 if (rump_physmemlimit != RUMPMEM_UNLIMITED) {
1241 atomic_add_long(&curphysmem, -size);
1242 }
1243 rumpuser_free(what, size);
1244 }
1245