Home | History | Annotate | Line # | Download | only in rumpkern
vm.c revision 1.112
      1 /*	$NetBSD: vm.c,v 1.112 2011/02/22 18:43:20 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2007-2010 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Development of this software was supported by
      7  * The Finnish Cultural Foundation and the Research Foundation of
      8  * The Helsinki University of Technology.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     20  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     25  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Virtual memory emulation routines.
     34  */
     35 
     36 /*
     37  * XXX: we abuse pg->uanon for the virtual address of the storage
     38  * for each page.  phys_addr would fit the job description better,
     39  * except that it will create unnecessary lossage on some platforms
     40  * due to not being a pointer type.
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.112 2011/02/22 18:43:20 pooka Exp $");
     45 
     46 #include <sys/param.h>
     47 #include <sys/atomic.h>
     48 #include <sys/buf.h>
     49 #include <sys/kernel.h>
     50 #include <sys/kmem.h>
     51 #include <sys/mman.h>
     52 #include <sys/null.h>
     53 #include <sys/vnode.h>
     54 
     55 #include <machine/pmap.h>
     56 
     57 #include <rump/rumpuser.h>
     58 
     59 #include <uvm/uvm.h>
     60 #include <uvm/uvm_ddb.h>
     61 #include <uvm/uvm_pdpolicy.h>
     62 #include <uvm/uvm_prot.h>
     63 #include <uvm/uvm_readahead.h>
     64 
     65 #include "rump_private.h"
     66 #include "rump_vfs_private.h"
     67 
     68 kmutex_t uvm_pageqlock;
     69 kmutex_t uvm_swap_data_lock;
     70 
     71 struct uvmexp uvmexp;
     72 struct uvm uvm;
     73 
     74 #ifdef __uvmexp_pagesize
     75 int *uvmexp_pagesize = &uvmexp.pagesize;
     76 int *uvmexp_pagemask = &uvmexp.pagemask;
     77 int *uvmexp_pageshift = &uvmexp.pageshift;
     78 #endif
     79 
     80 struct vm_map rump_vmmap;
     81 static struct vm_map_kernel kmem_map_store;
     82 struct vm_map *kmem_map = &kmem_map_store.vmk_map;
     83 
     84 static struct vm_map_kernel kernel_map_store;
     85 struct vm_map *kernel_map = &kernel_map_store.vmk_map;
     86 
     87 static unsigned int pdaemon_waiters;
     88 static kmutex_t pdaemonmtx;
     89 static kcondvar_t pdaemoncv, oomwait;
     90 
     91 unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED;
     92 static unsigned long curphysmem;
     93 static unsigned long dddlim;		/* 90% of memory limit used */
     94 #define NEED_PAGEDAEMON() \
     95     (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim)
     96 
     97 /*
     98  * Try to free two pages worth of pages from objects.
     99  * If this succesfully frees a full page cache page, we'll
    100  * free the released page plus PAGE_SIZE/sizeof(vm_page).
    101  */
    102 #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page))
    103 
    104 /*
    105  * Keep a list of least recently used pages.  Since the only way a
    106  * rump kernel can "access" a page is via lookup, we put the page
    107  * at the back of queue every time a lookup for it is done.  If the
    108  * page is in front of this global queue and we're short of memory,
    109  * it's a candidate for pageout.
    110  */
    111 static struct pglist vmpage_lruqueue;
    112 static unsigned vmpage_onqueue;
    113 
    114 static int
    115 pg_compare_key(void *ctx, const void *n, const void *key)
    116 {
    117 	voff_t a = ((const struct vm_page *)n)->offset;
    118 	voff_t b = *(const voff_t *)key;
    119 
    120 	if (a < b)
    121 		return -1;
    122 	else if (a > b)
    123 		return 1;
    124 	else
    125 		return 0;
    126 }
    127 
    128 static int
    129 pg_compare_nodes(void *ctx, const void *n1, const void *n2)
    130 {
    131 
    132 	return pg_compare_key(ctx, n1, &((const struct vm_page *)n2)->offset);
    133 }
    134 
    135 const rb_tree_ops_t uvm_page_tree_ops = {
    136 	.rbto_compare_nodes = pg_compare_nodes,
    137 	.rbto_compare_key = pg_compare_key,
    138 	.rbto_node_offset = offsetof(struct vm_page, rb_node),
    139 	.rbto_context = NULL
    140 };
    141 
    142 /*
    143  * vm pages
    144  */
    145 
    146 static int
    147 pgctor(void *arg, void *obj, int flags)
    148 {
    149 	struct vm_page *pg = obj;
    150 
    151 	memset(pg, 0, sizeof(*pg));
    152 	pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE,
    153 	    (flags & PR_WAITOK) == PR_WAITOK, "pgalloc");
    154 	return pg->uanon == NULL;
    155 }
    156 
    157 static void
    158 pgdtor(void *arg, void *obj)
    159 {
    160 	struct vm_page *pg = obj;
    161 
    162 	rump_hyperfree(pg->uanon, PAGE_SIZE);
    163 }
    164 
    165 static struct pool_cache pagecache;
    166 
    167 /*
    168  * Called with the object locked.  We don't support anons.
    169  */
    170 struct vm_page *
    171 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
    172 	int flags, int strat, int free_list)
    173 {
    174 	struct vm_page *pg;
    175 
    176 	KASSERT(uobj && mutex_owned(&uobj->vmobjlock));
    177 	KASSERT(anon == NULL);
    178 
    179 	pg = pool_cache_get(&pagecache, PR_NOWAIT);
    180 	if (__predict_false(pg == NULL)) {
    181 		return NULL;
    182 	}
    183 
    184 	pg->offset = off;
    185 	pg->uobject = uobj;
    186 
    187 	pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
    188 	if (flags & UVM_PGA_ZERO) {
    189 		uvm_pagezero(pg);
    190 	}
    191 
    192 	TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
    193 	(void)rb_tree_insert_node(&uobj->rb_tree, pg);
    194 
    195 	/*
    196 	 * Don't put anons on the LRU page queue.  We can't flush them
    197 	 * (there's no concept of swap in a rump kernel), so no reason
    198 	 * to bother with them.
    199 	 */
    200 	if (!UVM_OBJ_IS_AOBJ(uobj)) {
    201 		atomic_inc_uint(&vmpage_onqueue);
    202 		mutex_enter(&uvm_pageqlock);
    203 		TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
    204 		mutex_exit(&uvm_pageqlock);
    205 	}
    206 
    207 	uobj->uo_npages++;
    208 
    209 	return pg;
    210 }
    211 
    212 /*
    213  * Release a page.
    214  *
    215  * Called with the vm object locked.
    216  */
    217 void
    218 uvm_pagefree(struct vm_page *pg)
    219 {
    220 	struct uvm_object *uobj = pg->uobject;
    221 
    222 	KASSERT(mutex_owned(&uvm_pageqlock));
    223 	KASSERT(mutex_owned(&uobj->vmobjlock));
    224 
    225 	if (pg->flags & PG_WANTED)
    226 		wakeup(pg);
    227 
    228 	TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
    229 
    230 	uobj->uo_npages--;
    231 	rb_tree_remove_node(&uobj->rb_tree, pg);
    232 
    233 	if (!UVM_OBJ_IS_AOBJ(uobj)) {
    234 		TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
    235 		atomic_dec_uint(&vmpage_onqueue);
    236 	}
    237 
    238 	pool_cache_put(&pagecache, pg);
    239 }
    240 
    241 void
    242 uvm_pagezero(struct vm_page *pg)
    243 {
    244 
    245 	pg->flags &= ~PG_CLEAN;
    246 	memset((void *)pg->uanon, 0, PAGE_SIZE);
    247 }
    248 
    249 /*
    250  * Misc routines
    251  */
    252 
    253 static kmutex_t pagermtx;
    254 
    255 void
    256 uvm_init(void)
    257 {
    258 	char buf[64];
    259 	int error;
    260 
    261 	if (rumpuser_getenv("RUMP_MEMLIMIT", buf, sizeof(buf), &error) == 0) {
    262 		unsigned long tmp;
    263 		char *ep;
    264 		int mult;
    265 
    266 		tmp = strtoul(buf, &ep, 10);
    267 		if (strlen(ep) > 1)
    268 			panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
    269 
    270 		/* mini-dehumanize-number */
    271 		mult = 1;
    272 		switch (*ep) {
    273 		case 'k':
    274 			mult = 1024;
    275 			break;
    276 		case 'm':
    277 			mult = 1024*1024;
    278 			break;
    279 		case 'g':
    280 			mult = 1024*1024*1024;
    281 			break;
    282 		case 0:
    283 			break;
    284 		default:
    285 			panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
    286 		}
    287 		rump_physmemlimit = tmp * mult;
    288 
    289 		if (rump_physmemlimit / mult != tmp)
    290 			panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf);
    291 		/* it's not like we'd get far with, say, 1 byte, but ... */
    292 		if (rump_physmemlimit == 0)
    293 			panic("uvm_init: no memory");
    294 
    295 #define HUMANIZE_BYTES 9
    296 		CTASSERT(sizeof(buf) >= HUMANIZE_BYTES);
    297 		format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit);
    298 #undef HUMANIZE_BYTES
    299 		dddlim = 9 * (rump_physmemlimit / 10);
    300 	} else {
    301 		strlcpy(buf, "unlimited (host limit)", sizeof(buf));
    302 	}
    303 	aprint_verbose("total memory = %s\n", buf);
    304 
    305 	TAILQ_INIT(&vmpage_lruqueue);
    306 
    307 	uvmexp.free = 1024*1024; /* XXX: arbitrary & not updated */
    308 
    309 #ifndef __uvmexp_pagesize
    310 	uvmexp.pagesize = PAGE_SIZE;
    311 	uvmexp.pagemask = PAGE_MASK;
    312 	uvmexp.pageshift = PAGE_SHIFT;
    313 #else
    314 #define FAKE_PAGE_SHIFT 12
    315 	uvmexp.pageshift = FAKE_PAGE_SHIFT;
    316 	uvmexp.pagesize = 1<<FAKE_PAGE_SHIFT;
    317 	uvmexp.pagemask = (1<<FAKE_PAGE_SHIFT)-1;
    318 #undef FAKE_PAGE_SHIFT
    319 #endif
    320 
    321 	mutex_init(&pagermtx, MUTEX_DEFAULT, 0);
    322 	mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
    323 	mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, 0);
    324 
    325 	mutex_init(&pdaemonmtx, MUTEX_DEFAULT, 0);
    326 	cv_init(&pdaemoncv, "pdaemon");
    327 	cv_init(&oomwait, "oomwait");
    328 
    329 	kernel_map->pmap = pmap_kernel();
    330 	callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
    331 	kmem_map->pmap = pmap_kernel();
    332 	callback_head_init(&kmem_map_store.vmk_reclaim_callback, IPL_VM);
    333 
    334 	pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0,
    335 	    "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL);
    336 }
    337 
    338 void
    339 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax)
    340 {
    341 
    342 	vm->vm_map.pmap = pmap_kernel();
    343 	vm->vm_refcnt = 1;
    344 }
    345 
    346 void
    347 uvm_pagewire(struct vm_page *pg)
    348 {
    349 
    350 	/* nada */
    351 }
    352 
    353 void
    354 uvm_pageunwire(struct vm_page *pg)
    355 {
    356 
    357 	/* nada */
    358 }
    359 
    360 /*
    361  * The uvm reclaim hook is not currently necessary because it is
    362  * used only by ZFS and implements exactly the same functionality
    363  * as the kva reclaim hook which we already run in the pagedaemon
    364  * (rump vm does not have a concept of uvm_map(), so we cannot
    365  * reclaim kva it when a mapping operation fails due to insufficient
    366  * available kva).
    367  */
    368 void
    369 uvm_reclaim_hook_add(struct uvm_reclaim_hook *hook_entry)
    370 {
    371 
    372 }
    373 __strong_alias(uvm_reclaim_hook_del,uvm_reclaim_hook_add);
    374 
    375 /* where's your schmonz now? */
    376 #define PUNLIMIT(a)	\
    377 p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY;
    378 void
    379 uvm_init_limits(struct proc *p)
    380 {
    381 
    382 	PUNLIMIT(RLIMIT_STACK);
    383 	PUNLIMIT(RLIMIT_DATA);
    384 	PUNLIMIT(RLIMIT_RSS);
    385 	PUNLIMIT(RLIMIT_AS);
    386 	/* nice, cascade */
    387 }
    388 #undef PUNLIMIT
    389 
    390 /*
    391  * This satisfies the "disgusting mmap hack" used by proplib.
    392  * We probably should grow some more assertables to make sure we're
    393  * not satisfying anything we shouldn't be satisfying.
    394  */
    395 int
    396 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
    397 	vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim)
    398 {
    399 	void *uaddr;
    400 	int error;
    401 
    402 	if (prot != (VM_PROT_READ | VM_PROT_WRITE))
    403 		panic("uvm_mmap() variant unsupported");
    404 	if (flags != (MAP_PRIVATE | MAP_ANON))
    405 		panic("uvm_mmap() variant unsupported");
    406 
    407 	/* no reason in particular, but cf. uvm_default_mapaddr() */
    408 	if (*addr != 0)
    409 		panic("uvm_mmap() variant unsupported");
    410 
    411 	if (RUMP_LOCALPROC_P(curproc)) {
    412 		uaddr = rumpuser_anonmmap(NULL, size, 0, 0, &error);
    413 	} else {
    414 		error = rumpuser_sp_anonmmap(curproc->p_vmspace->vm_map.pmap,
    415 		    size, &uaddr);
    416 	}
    417 	if (uaddr == NULL)
    418 		return error;
    419 
    420 	*addr = (vaddr_t)uaddr;
    421 	return 0;
    422 }
    423 
    424 struct pagerinfo {
    425 	vaddr_t pgr_kva;
    426 	int pgr_npages;
    427 	struct vm_page **pgr_pgs;
    428 	bool pgr_read;
    429 
    430 	LIST_ENTRY(pagerinfo) pgr_entries;
    431 };
    432 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist);
    433 
    434 /*
    435  * Pager "map" in routine.  Instead of mapping, we allocate memory
    436  * and copy page contents there.  Not optimal or even strictly
    437  * correct (the caller might modify the page contents after mapping
    438  * them in), but what the heck.  Assumes UVMPAGER_MAPIN_WAITOK.
    439  */
    440 vaddr_t
    441 uvm_pagermapin(struct vm_page **pgs, int npages, int flags)
    442 {
    443 	struct pagerinfo *pgri;
    444 	vaddr_t curkva;
    445 	int i;
    446 
    447 	/* allocate structures */
    448 	pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP);
    449 	pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP);
    450 	pgri->pgr_npages = npages;
    451 	pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP);
    452 	pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0;
    453 
    454 	/* copy contents to "mapped" memory */
    455 	for (i = 0, curkva = pgri->pgr_kva;
    456 	    i < npages;
    457 	    i++, curkva += PAGE_SIZE) {
    458 		/*
    459 		 * We need to copy the previous contents of the pages to
    460 		 * the window even if we are reading from the
    461 		 * device, since the device might not fill the contents of
    462 		 * the full mapped range and we will end up corrupting
    463 		 * data when we unmap the window.
    464 		 */
    465 		memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE);
    466 		pgri->pgr_pgs[i] = pgs[i];
    467 	}
    468 
    469 	mutex_enter(&pagermtx);
    470 	LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries);
    471 	mutex_exit(&pagermtx);
    472 
    473 	return pgri->pgr_kva;
    474 }
    475 
    476 /*
    477  * map out the pager window.  return contents from VA to page storage
    478  * and free structures.
    479  *
    480  * Note: does not currently support partial frees
    481  */
    482 void
    483 uvm_pagermapout(vaddr_t kva, int npages)
    484 {
    485 	struct pagerinfo *pgri;
    486 	vaddr_t curkva;
    487 	int i;
    488 
    489 	mutex_enter(&pagermtx);
    490 	LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
    491 		if (pgri->pgr_kva == kva)
    492 			break;
    493 	}
    494 	KASSERT(pgri);
    495 	if (pgri->pgr_npages != npages)
    496 		panic("uvm_pagermapout: partial unmapping not supported");
    497 	LIST_REMOVE(pgri, pgr_entries);
    498 	mutex_exit(&pagermtx);
    499 
    500 	if (pgri->pgr_read) {
    501 		for (i = 0, curkva = pgri->pgr_kva;
    502 		    i < pgri->pgr_npages;
    503 		    i++, curkva += PAGE_SIZE) {
    504 			memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE);
    505 		}
    506 	}
    507 
    508 	kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *));
    509 	kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE);
    510 	kmem_free(pgri, sizeof(*pgri));
    511 }
    512 
    513 /*
    514  * convert va in pager window to page structure.
    515  * XXX: how expensive is this (global lock, list traversal)?
    516  */
    517 struct vm_page *
    518 uvm_pageratop(vaddr_t va)
    519 {
    520 	struct pagerinfo *pgri;
    521 	struct vm_page *pg = NULL;
    522 	int i;
    523 
    524 	mutex_enter(&pagermtx);
    525 	LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
    526 		if (pgri->pgr_kva <= va
    527 		    && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE)
    528 			break;
    529 	}
    530 	if (pgri) {
    531 		i = (va - pgri->pgr_kva) >> PAGE_SHIFT;
    532 		pg = pgri->pgr_pgs[i];
    533 	}
    534 	mutex_exit(&pagermtx);
    535 
    536 	return pg;
    537 }
    538 
    539 /*
    540  * Called with the vm object locked.
    541  *
    542  * Put vnode object pages at the end of the access queue to indicate
    543  * they have been recently accessed and should not be immediate
    544  * candidates for pageout.  Do not do this for lookups done by
    545  * the pagedaemon to mimic pmap_kentered mappings which don't track
    546  * access information.
    547  */
    548 struct vm_page *
    549 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
    550 {
    551 	struct vm_page *pg;
    552 	bool ispagedaemon = curlwp == uvm.pagedaemon_lwp;
    553 
    554 	pg = rb_tree_find_node(&uobj->rb_tree, &off);
    555 	if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) {
    556 		mutex_enter(&uvm_pageqlock);
    557 		TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
    558 		TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
    559 		mutex_exit(&uvm_pageqlock);
    560 	}
    561 
    562 	return pg;
    563 }
    564 
    565 void
    566 uvm_page_unbusy(struct vm_page **pgs, int npgs)
    567 {
    568 	struct vm_page *pg;
    569 	int i;
    570 
    571 	KASSERT(npgs > 0);
    572 	KASSERT(mutex_owned(&pgs[0]->uobject->vmobjlock));
    573 
    574 	for (i = 0; i < npgs; i++) {
    575 		pg = pgs[i];
    576 		if (pg == NULL)
    577 			continue;
    578 
    579 		KASSERT(pg->flags & PG_BUSY);
    580 		if (pg->flags & PG_WANTED)
    581 			wakeup(pg);
    582 		if (pg->flags & PG_RELEASED)
    583 			uvm_pagefree(pg);
    584 		else
    585 			pg->flags &= ~(PG_WANTED|PG_BUSY);
    586 	}
    587 }
    588 
    589 void
    590 uvm_estimatepageable(int *active, int *inactive)
    591 {
    592 
    593 	/* XXX: guessing game */
    594 	*active = 1024;
    595 	*inactive = 1024;
    596 }
    597 
    598 struct vm_map_kernel *
    599 vm_map_to_kernel(struct vm_map *map)
    600 {
    601 
    602 	return (struct vm_map_kernel *)map;
    603 }
    604 
    605 bool
    606 vm_map_starved_p(struct vm_map *map)
    607 {
    608 
    609 	if (map->flags & VM_MAP_WANTVA)
    610 		return true;
    611 
    612 	return false;
    613 }
    614 
    615 int
    616 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
    617 {
    618 
    619 	panic("%s: unimplemented", __func__);
    620 }
    621 
    622 void
    623 uvm_unloan(void *v, int npages, int flags)
    624 {
    625 
    626 	panic("%s: unimplemented", __func__);
    627 }
    628 
    629 int
    630 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
    631 	struct vm_page **opp)
    632 {
    633 
    634 	return EBUSY;
    635 }
    636 
    637 #ifdef DEBUGPRINT
    638 void
    639 uvm_object_printit(struct uvm_object *uobj, bool full,
    640 	void (*pr)(const char *, ...))
    641 {
    642 
    643 	pr("VM OBJECT at %p, refs %d", uobj, uobj->uo_refs);
    644 }
    645 #endif
    646 
    647 vaddr_t
    648 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz)
    649 {
    650 
    651 	return 0;
    652 }
    653 
    654 int
    655 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
    656 	vm_prot_t prot, bool set_max)
    657 {
    658 
    659 	return EOPNOTSUPP;
    660 }
    661 
    662 /*
    663  * UVM km
    664  */
    665 
    666 vaddr_t
    667 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
    668 {
    669 	void *rv, *desired = NULL;
    670 	int alignbit, error;
    671 
    672 #ifdef __x86_64__
    673 	/*
    674 	 * On amd64, allocate all module memory from the lowest 2GB.
    675 	 * This is because NetBSD kernel modules are compiled
    676 	 * with -mcmodel=kernel and reserve only 4 bytes for
    677 	 * offsets.  If we load code compiled with -mcmodel=kernel
    678 	 * anywhere except the lowest or highest 2GB, it will not
    679 	 * work.  Since userspace does not have access to the highest
    680 	 * 2GB, use the lowest 2GB.
    681 	 *
    682 	 * Note: this assumes the rump kernel resides in
    683 	 * the lowest 2GB as well.
    684 	 *
    685 	 * Note2: yes, it's a quick hack, but since this the only
    686 	 * place where we care about the map we're allocating from,
    687 	 * just use a simple "if" instead of coming up with a fancy
    688 	 * generic solution.
    689 	 */
    690 	extern struct vm_map *module_map;
    691 	if (map == module_map) {
    692 		desired = (void *)(0x80000000 - size);
    693 	}
    694 #endif
    695 
    696 	alignbit = 0;
    697 	if (align) {
    698 		alignbit = ffs(align)-1;
    699 	}
    700 
    701 	rv = rumpuser_anonmmap(desired, size, alignbit, flags & UVM_KMF_EXEC,
    702 	    &error);
    703 	if (rv == NULL) {
    704 		if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT))
    705 			return 0;
    706 		else
    707 			panic("uvm_km_alloc failed");
    708 	}
    709 
    710 	if (flags & UVM_KMF_ZERO)
    711 		memset(rv, 0, size);
    712 
    713 	return (vaddr_t)rv;
    714 }
    715 
    716 void
    717 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
    718 {
    719 
    720 	rumpuser_unmap((void *)vaddr, size);
    721 }
    722 
    723 struct vm_map *
    724 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
    725 	vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
    726 {
    727 
    728 	return (struct vm_map *)417416;
    729 }
    730 
    731 vaddr_t
    732 uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
    733 {
    734 
    735 	return (vaddr_t)rump_hypermalloc(PAGE_SIZE, PAGE_SIZE,
    736 	    waitok, "kmalloc");
    737 }
    738 
    739 void
    740 uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
    741 {
    742 
    743 	rump_hyperfree((void *)addr, PAGE_SIZE);
    744 }
    745 
    746 vaddr_t
    747 uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok)
    748 {
    749 
    750 	return uvm_km_alloc_poolpage(map, waitok);
    751 }
    752 
    753 void
    754 uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t vaddr)
    755 {
    756 
    757 	uvm_km_free_poolpage(map, vaddr);
    758 }
    759 
    760 void
    761 uvm_km_va_drain(struct vm_map *map, uvm_flag_t flags)
    762 {
    763 
    764 	/* we eventually maybe want some model for available memory */
    765 }
    766 
    767 /*
    768  * VM space locking routines.  We don't really have to do anything,
    769  * since the pages are always "wired" (both local and remote processes).
    770  */
    771 int
    772 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access)
    773 {
    774 
    775 	return 0;
    776 }
    777 
    778 void
    779 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
    780 {
    781 
    782 }
    783 
    784 /*
    785  * For the local case the buffer mappers don't need to do anything.
    786  * For the remote case we need to reserve space and copy data in or
    787  * out, depending on B_READ/B_WRITE.
    788  */
    789 int
    790 vmapbuf(struct buf *bp, vsize_t len)
    791 {
    792 	int error = 0;
    793 
    794 	bp->b_saveaddr = bp->b_data;
    795 
    796 	/* remote case */
    797 	if (!RUMP_LOCALPROC_P(curproc)) {
    798 		bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf");
    799 		if (BUF_ISWRITE(bp)) {
    800 			error = copyin(bp->b_saveaddr, bp->b_data, len);
    801 			if (error) {
    802 				rump_hyperfree(bp->b_data, len);
    803 				bp->b_data = bp->b_saveaddr;
    804 				bp->b_saveaddr = 0;
    805 			}
    806 		}
    807 	}
    808 
    809 	return error;
    810 }
    811 
    812 void
    813 vunmapbuf(struct buf *bp, vsize_t len)
    814 {
    815 
    816 	/* remote case */
    817 	if (!RUMP_LOCALPROC_P(bp->b_proc)) {
    818 		if (BUF_ISREAD(bp)) {
    819 			bp->b_error = copyout_proc(bp->b_proc,
    820 			    bp->b_data, bp->b_saveaddr, len);
    821 		}
    822 		rump_hyperfree(bp->b_data, len);
    823 	}
    824 
    825 	bp->b_data = bp->b_saveaddr;
    826 	bp->b_saveaddr = 0;
    827 }
    828 
    829 void
    830 uvmspace_addref(struct vmspace *vm)
    831 {
    832 
    833 	/*
    834 	 * No dynamically allocated vmspaces exist.
    835 	 */
    836 }
    837 
    838 void
    839 uvmspace_free(struct vmspace *vm)
    840 {
    841 
    842 	/* nothing for now */
    843 }
    844 
    845 /*
    846  * page life cycle stuff.  it really doesn't exist, so just stubs.
    847  */
    848 
    849 void
    850 uvm_pageactivate(struct vm_page *pg)
    851 {
    852 
    853 	/* nada */
    854 }
    855 
    856 void
    857 uvm_pagedeactivate(struct vm_page *pg)
    858 {
    859 
    860 	/* nada */
    861 }
    862 
    863 void
    864 uvm_pagedequeue(struct vm_page *pg)
    865 {
    866 
    867 	/* nada*/
    868 }
    869 
    870 void
    871 uvm_pageenqueue(struct vm_page *pg)
    872 {
    873 
    874 	/* nada */
    875 }
    876 
    877 void
    878 uvmpdpol_anfree(struct vm_anon *an)
    879 {
    880 
    881 	/* nada */
    882 }
    883 
    884 /*
    885  * Physical address accessors.
    886  */
    887 
    888 struct vm_page *
    889 uvm_phys_to_vm_page(paddr_t pa)
    890 {
    891 
    892 	return NULL;
    893 }
    894 
    895 paddr_t
    896 uvm_vm_page_to_phys(const struct vm_page *pg)
    897 {
    898 
    899 	return 0;
    900 }
    901 
    902 /*
    903  * Routines related to the Page Baroness.
    904  */
    905 
    906 void
    907 uvm_wait(const char *msg)
    908 {
    909 
    910 	if (__predict_false(curlwp == uvm.pagedaemon_lwp))
    911 		panic("pagedaemon out of memory");
    912 	if (__predict_false(rump_threads == 0))
    913 		panic("pagedaemon missing (RUMP_THREADS = 0)");
    914 
    915 	mutex_enter(&pdaemonmtx);
    916 	pdaemon_waiters++;
    917 	cv_signal(&pdaemoncv);
    918 	cv_wait(&oomwait, &pdaemonmtx);
    919 	mutex_exit(&pdaemonmtx);
    920 }
    921 
    922 void
    923 uvm_pageout_start(int npages)
    924 {
    925 
    926 	/* we don't have the heuristics */
    927 }
    928 
    929 void
    930 uvm_pageout_done(int npages)
    931 {
    932 
    933 	/* could wakeup waiters, but just let the pagedaemon do it */
    934 }
    935 
    936 static bool
    937 processpage(struct vm_page *pg, bool *lockrunning)
    938 {
    939 	struct uvm_object *uobj;
    940 
    941 	uobj = pg->uobject;
    942 	if (mutex_tryenter(&uobj->vmobjlock)) {
    943 		if ((pg->flags & PG_BUSY) == 0) {
    944 			mutex_exit(&uvm_pageqlock);
    945 			uobj->pgops->pgo_put(uobj, pg->offset,
    946 			    pg->offset + PAGE_SIZE,
    947 			    PGO_CLEANIT|PGO_FREE);
    948 			KASSERT(!mutex_owned(&uobj->vmobjlock));
    949 			return true;
    950 		} else {
    951 			mutex_exit(&uobj->vmobjlock);
    952 		}
    953 	} else if (*lockrunning == false && ncpu > 1) {
    954 		CPU_INFO_ITERATOR cii;
    955 		struct cpu_info *ci;
    956 		struct lwp *l;
    957 
    958 		l = mutex_owner(&uobj->vmobjlock);
    959 		for (CPU_INFO_FOREACH(cii, ci)) {
    960 			if (ci->ci_curlwp == l) {
    961 				*lockrunning = true;
    962 				break;
    963 			}
    964 		}
    965 	}
    966 
    967 	return false;
    968 }
    969 
    970 /*
    971  * The Diabolical pageDaemon Director (DDD).
    972  */
    973 void
    974 uvm_pageout(void *arg)
    975 {
    976 	struct vm_page *pg;
    977 	struct pool *pp, *pp_first;
    978 	uint64_t where;
    979 	int timo = 0;
    980 	int cleaned, skip, skipped;
    981 	bool succ = false;
    982 	bool lockrunning;
    983 
    984 	mutex_enter(&pdaemonmtx);
    985 	for (;;) {
    986 		if (succ) {
    987 			kernel_map->flags &= ~VM_MAP_WANTVA;
    988 			kmem_map->flags &= ~VM_MAP_WANTVA;
    989 			timo = 0;
    990 			if (pdaemon_waiters) {
    991 				pdaemon_waiters = 0;
    992 				cv_broadcast(&oomwait);
    993 			}
    994 		}
    995 		succ = false;
    996 
    997 		if (pdaemon_waiters == 0) {
    998 			cv_timedwait(&pdaemoncv, &pdaemonmtx, timo);
    999 			uvmexp.pdwoke++;
   1000 		}
   1001 
   1002 		/* tell the world that we are hungry */
   1003 		kernel_map->flags |= VM_MAP_WANTVA;
   1004 		kmem_map->flags |= VM_MAP_WANTVA;
   1005 
   1006 		if (pdaemon_waiters == 0 && !NEED_PAGEDAEMON())
   1007 			continue;
   1008 		mutex_exit(&pdaemonmtx);
   1009 
   1010 		/*
   1011 		 * step one: reclaim the page cache.  this should give
   1012 		 * us the biggest earnings since whole pages are released
   1013 		 * into backing memory.
   1014 		 */
   1015 		pool_cache_reclaim(&pagecache);
   1016 		if (!NEED_PAGEDAEMON()) {
   1017 			succ = true;
   1018 			mutex_enter(&pdaemonmtx);
   1019 			continue;
   1020 		}
   1021 
   1022 		/*
   1023 		 * Ok, so that didn't help.  Next, try to hunt memory
   1024 		 * by pushing out vnode pages.  The pages might contain
   1025 		 * useful cached data, but we need the memory.
   1026 		 */
   1027 		cleaned = 0;
   1028 		skip = 0;
   1029 		lockrunning = false;
   1030  again:
   1031 		mutex_enter(&uvm_pageqlock);
   1032 		while (cleaned < PAGEDAEMON_OBJCHUNK) {
   1033 			skipped = 0;
   1034 			TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) {
   1035 
   1036 				/*
   1037 				 * skip over pages we _might_ have tried
   1038 				 * to handle earlier.  they might not be
   1039 				 * exactly the same ones, but I'm not too
   1040 				 * concerned.
   1041 				 */
   1042 				while (skipped++ < skip)
   1043 					continue;
   1044 
   1045 				if (processpage(pg, &lockrunning)) {
   1046 					cleaned++;
   1047 					goto again;
   1048 				}
   1049 
   1050 				skip++;
   1051 			}
   1052 			break;
   1053 		}
   1054 		mutex_exit(&uvm_pageqlock);
   1055 
   1056 		/*
   1057 		 * Ok, someone is running with an object lock held.
   1058 		 * We want to yield the host CPU to make sure the
   1059 		 * thread is not parked on the host.  Since sched_yield()
   1060 		 * doesn't appear to do anything on NetBSD, nanosleep
   1061 		 * for the smallest possible time and hope we're back in
   1062 		 * the game soon.
   1063 		 */
   1064 		if (cleaned == 0 && lockrunning) {
   1065 			uint64_t sec, nsec;
   1066 
   1067 			sec = 0;
   1068 			nsec = 1;
   1069 			rumpuser_nanosleep(&sec, &nsec, NULL);
   1070 
   1071 			lockrunning = false;
   1072 			skip = 0;
   1073 
   1074 			/* and here we go again */
   1075 			goto again;
   1076 		}
   1077 
   1078 		/*
   1079 		 * And of course we need to reclaim the page cache
   1080 		 * again to actually release memory.
   1081 		 */
   1082 		pool_cache_reclaim(&pagecache);
   1083 		if (!NEED_PAGEDAEMON()) {
   1084 			succ = true;
   1085 			mutex_enter(&pdaemonmtx);
   1086 			continue;
   1087 		}
   1088 
   1089 		/*
   1090 		 * Still not there?  sleeves come off right about now.
   1091 		 * First: do reclaim on kernel/kmem map.
   1092 		 */
   1093 		callback_run_roundrobin(&kernel_map_store.vmk_reclaim_callback,
   1094 		    NULL);
   1095 		callback_run_roundrobin(&kmem_map_store.vmk_reclaim_callback,
   1096 		    NULL);
   1097 
   1098 		/*
   1099 		 * And then drain the pools.  Wipe them out ... all of them.
   1100 		 */
   1101 
   1102 		pool_drain_start(&pp_first, &where);
   1103 		pp = pp_first;
   1104 		for (;;) {
   1105 			rump_vfs_drainbufs(10 /* XXX: estimate better */);
   1106 			succ = pool_drain_end(pp, where);
   1107 			if (succ)
   1108 				break;
   1109 			pool_drain_start(&pp, &where);
   1110 			if (pp == pp_first) {
   1111 				succ = pool_drain_end(pp, where);
   1112 				break;
   1113 			}
   1114 		}
   1115 
   1116 		/*
   1117 		 * Need to use PYEC on our bag of tricks.
   1118 		 * Unfortunately, the wife just borrowed it.
   1119 		 */
   1120 
   1121 		if (!succ && cleaned == 0) {
   1122 			rumpuser_dprintf("pagedaemoness: failed to reclaim "
   1123 			    "memory ... sleeping (deadlock?)\n");
   1124 			timo = hz;
   1125 		}
   1126 
   1127 		mutex_enter(&pdaemonmtx);
   1128 	}
   1129 
   1130 	panic("you can swap out any time you like, but you can never leave");
   1131 }
   1132 
   1133 void
   1134 uvm_kick_pdaemon()
   1135 {
   1136 
   1137 	/*
   1138 	 * Wake up the diabolical pagedaemon director if we are over
   1139 	 * 90% of the memory limit.  This is a complete and utter
   1140 	 * stetson-harrison decision which you are allowed to finetune.
   1141 	 * Don't bother locking.  If we have some unflushed caches,
   1142 	 * other waker-uppers will deal with the issue.
   1143 	 */
   1144 	if (NEED_PAGEDAEMON()) {
   1145 		cv_signal(&pdaemoncv);
   1146 	}
   1147 }
   1148 
   1149 void *
   1150 rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg)
   1151 {
   1152 	unsigned long newmem;
   1153 	void *rv;
   1154 
   1155 	uvm_kick_pdaemon(); /* ouch */
   1156 
   1157 	/* first we must be within the limit */
   1158  limitagain:
   1159 	if (rump_physmemlimit != RUMPMEM_UNLIMITED) {
   1160 		newmem = atomic_add_long_nv(&curphysmem, howmuch);
   1161 		if (newmem > rump_physmemlimit) {
   1162 			newmem = atomic_add_long_nv(&curphysmem, -howmuch);
   1163 			if (!waitok) {
   1164 				return NULL;
   1165 			}
   1166 			uvm_wait(wmsg);
   1167 			goto limitagain;
   1168 		}
   1169 	}
   1170 
   1171 	/* second, we must get something from the backend */
   1172  again:
   1173 	rv = rumpuser_malloc(howmuch, alignment);
   1174 	if (__predict_false(rv == NULL && waitok)) {
   1175 		uvm_wait(wmsg);
   1176 		goto again;
   1177 	}
   1178 
   1179 	return rv;
   1180 }
   1181 
   1182 void
   1183 rump_hyperfree(void *what, size_t size)
   1184 {
   1185 
   1186 	if (rump_physmemlimit != RUMPMEM_UNLIMITED) {
   1187 		atomic_add_long(&curphysmem, -size);
   1188 	}
   1189 	rumpuser_free(what);
   1190 }
   1191