Home | History | Annotate | Line # | Download | only in rumpkern
vm.c revision 1.120.2.10
      1 /*	$NetBSD: vm.c,v 1.120.2.10 2014/05/22 11:41:15 yamt Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2007-2011 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Development of this software was supported by
      7  * The Finnish Cultural Foundation and the Research Foundation of
      8  * The Helsinki University of Technology.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     20  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     25  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Virtual memory emulation routines.
     34  */
     35 
     36 /*
     37  * XXX: we abuse pg->uanon for the virtual address of the storage
     38  * for each page.  phys_addr would fit the job description better,
     39  * except that it will create unnecessary lossage on some platforms
     40  * due to not being a pointer type.
     41  */
     42 
     43 #include <sys/cdefs.h>
     44 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.120.2.10 2014/05/22 11:41:15 yamt Exp $");
     45 
     46 #include <sys/param.h>
     47 #include <sys/atomic.h>
     48 #include <sys/buf.h>
     49 #include <sys/kernel.h>
     50 #include <sys/kmem.h>
     51 #include <sys/vmem.h>
     52 #include <sys/mman.h>
     53 #include <sys/null.h>
     54 #include <sys/vnode.h>
     55 
     56 #include <machine/pmap.h>
     57 
     58 #include <rump/rumpuser.h>
     59 
     60 #include <uvm/uvm.h>
     61 #include <uvm/uvm_ddb.h>
     62 #include <uvm/uvm_pdpolicy.h>
     63 #include <uvm/uvm_prot.h>
     64 #include <uvm/uvm_readahead.h>
     65 
     66 #include "rump_private.h"
     67 #include "rump_vfs_private.h"
     68 
     69 kmutex_t uvm_pageqlock; /* non-free page lock */
     70 kmutex_t uvm_fpageqlock; /* free page lock, non-gpl license */
     71 kmutex_t uvm_swap_data_lock;
     72 
     73 struct uvmexp uvmexp;
     74 struct uvm uvm;
     75 
     76 #ifdef __uvmexp_pagesize
     77 const int * const uvmexp_pagesize = &uvmexp.pagesize;
     78 const int * const uvmexp_pagemask = &uvmexp.pagemask;
     79 const int * const uvmexp_pageshift = &uvmexp.pageshift;
     80 #endif
     81 
     82 struct vm_map rump_vmmap;
     83 
     84 static struct vm_map kernel_map_store;
     85 struct vm_map *kernel_map = &kernel_map_store;
     86 
     87 static struct vm_map module_map_store;
     88 extern struct vm_map *module_map;
     89 
     90 vmem_t *kmem_arena;
     91 vmem_t *kmem_va_arena;
     92 
     93 static unsigned int pdaemon_waiters;
     94 static kmutex_t pdaemonmtx;
     95 static kcondvar_t pdaemoncv, oomwait;
     96 
     97 unsigned long rump_physmemlimit = RUMPMEM_UNLIMITED;
     98 static unsigned long pdlimit = RUMPMEM_UNLIMITED; /* page daemon memlimit */
     99 static unsigned long curphysmem;
    100 static unsigned long dddlim;		/* 90% of memory limit used */
    101 #define NEED_PAGEDAEMON() \
    102     (rump_physmemlimit != RUMPMEM_UNLIMITED && curphysmem > dddlim)
    103 
    104 /*
    105  * Try to free two pages worth of pages from objects.
    106  * If this succesfully frees a full page cache page, we'll
    107  * free the released page plus PAGE_SIZE/sizeof(vm_page).
    108  */
    109 #define PAGEDAEMON_OBJCHUNK (2*PAGE_SIZE / sizeof(struct vm_page))
    110 
    111 /*
    112  * Keep a list of least recently used pages.  Since the only way a
    113  * rump kernel can "access" a page is via lookup, we put the page
    114  * at the back of queue every time a lookup for it is done.  If the
    115  * page is in front of this global queue and we're short of memory,
    116  * it's a candidate for pageout.
    117  */
    118 static struct pglist vmpage_lruqueue;
    119 static unsigned vmpage_onqueue;
    120 
    121 /*
    122  * vm pages
    123  */
    124 
    125 static int
    126 pgctor(void *arg, void *obj, int flags)
    127 {
    128 	struct vm_page *pg = obj;
    129 
    130 	memset(pg, 0, sizeof(*pg));
    131 	pg->uanon = rump_hypermalloc(PAGE_SIZE, PAGE_SIZE,
    132 	    (flags & PR_WAITOK) == PR_WAITOK, "pgalloc");
    133 	return pg->uanon == NULL;
    134 }
    135 
    136 static void
    137 pgdtor(void *arg, void *obj)
    138 {
    139 	struct vm_page *pg = obj;
    140 
    141 	rump_hyperfree(pg->uanon, PAGE_SIZE);
    142 }
    143 
    144 static struct pool_cache pagecache;
    145 
    146 /*
    147  * Called with the object locked.  We don't support anons.
    148  */
    149 struct vm_page *
    150 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
    151 	int flags, int strat, int free_list)
    152 {
    153 	struct vm_page *pg;
    154 	int error;
    155 
    156 	KASSERT(uobj && mutex_owned(uobj->vmobjlock));
    157 	KASSERT(anon == NULL);
    158 
    159 	pg = pool_cache_get(&pagecache, PR_NOWAIT);
    160 	if (__predict_false(pg == NULL)) {
    161 		return NULL;
    162 	}
    163 
    164 	pg->offset = off;
    165 	pg->uobject = uobj;
    166 
    167 	pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
    168 	if (flags & UVM_PGA_ZERO) {
    169 		uvm_pagezero(pg);
    170 	}
    171 
    172 	error = radix_tree_insert_node(&uobj->uo_pages,
    173 	    pg->offset >> PAGE_SHIFT, pg);
    174 	KASSERT(error == 0);
    175 
    176 	/*
    177 	 * Don't put anons on the LRU page queue.  We can't flush them
    178 	 * (there's no concept of swap in a rump kernel), so no reason
    179 	 * to bother with them.
    180 	 */
    181 	if (!UVM_OBJ_IS_AOBJ(uobj)) {
    182 		atomic_inc_uint(&vmpage_onqueue);
    183 		mutex_enter(&uvm_pageqlock);
    184 		TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
    185 		mutex_exit(&uvm_pageqlock);
    186 	}
    187 
    188 	uobj->uo_npages++;
    189 
    190 	return pg;
    191 }
    192 
    193 /*
    194  * Release a page.
    195  *
    196  * Called with the vm object locked.
    197  */
    198 void
    199 uvm_pagefree(struct vm_page *pg)
    200 {
    201 	struct uvm_object *uobj = pg->uobject;
    202 	struct vm_page *opg;
    203 
    204 	KASSERT(mutex_owned(&uvm_pageqlock));
    205 	KASSERT(mutex_owned(uobj->vmobjlock));
    206 
    207 	if (pg->flags & PG_WANTED)
    208 		wakeup(pg);
    209 
    210 	uobj->uo_npages--;
    211 	opg = radix_tree_remove_node(&uobj->uo_pages, pg->offset >> PAGE_SHIFT);
    212 	KASSERT(pg == opg);
    213 
    214 	if (!UVM_OBJ_IS_AOBJ(uobj)) {
    215 		TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
    216 		atomic_dec_uint(&vmpage_onqueue);
    217 	}
    218 
    219 	pool_cache_put(&pagecache, pg);
    220 }
    221 
    222 void
    223 uvm_pagezero(struct vm_page *pg)
    224 {
    225 
    226 	uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
    227 	memset((void *)pg->uanon, 0, PAGE_SIZE);
    228 }
    229 
    230 /*
    231  * uvm_page_locked_p: return true if object associated with page is
    232  * locked.  this is a weak check for runtime assertions only.
    233  */
    234 
    235 bool
    236 uvm_page_locked_p(struct vm_page *pg)
    237 {
    238 
    239 	return mutex_owned(pg->uobject->vmobjlock);
    240 }
    241 
    242 /*
    243  * Misc routines
    244  */
    245 
    246 static kmutex_t pagermtx;
    247 
    248 void
    249 uvm_init(void)
    250 {
    251 	char buf[64];
    252 
    253 	if (rumpuser_getparam("RUMP_MEMLIMIT", buf, sizeof(buf)) == 0) {
    254 		unsigned long tmp;
    255 		char *ep;
    256 		int mult;
    257 
    258 		tmp = strtoul(buf, &ep, 10);
    259 		if (strlen(ep) > 1)
    260 			panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
    261 
    262 		/* mini-dehumanize-number */
    263 		mult = 1;
    264 		switch (*ep) {
    265 		case 'k':
    266 			mult = 1024;
    267 			break;
    268 		case 'm':
    269 			mult = 1024*1024;
    270 			break;
    271 		case 'g':
    272 			mult = 1024*1024*1024;
    273 			break;
    274 		case 0:
    275 			break;
    276 		default:
    277 			panic("uvm_init: invalid RUMP_MEMLIMIT: %s", buf);
    278 		}
    279 		rump_physmemlimit = tmp * mult;
    280 
    281 		if (rump_physmemlimit / mult != tmp)
    282 			panic("uvm_init: RUMP_MEMLIMIT overflow: %s", buf);
    283 		/* it's not like we'd get far with, say, 1 byte, but ... */
    284 		if (rump_physmemlimit < 1024*1024)
    285 			printf("uvm_init: WARNING: <1MB RAM limit, "
    286 			    "hope you know what you're doing\n");
    287 
    288 		/* reserve some memory for the pager */
    289 		pdlimit = rump_physmemlimit;
    290 		rump_physmemlimit -= 2*MAXPHYS;
    291 
    292 #define HUMANIZE_BYTES 9
    293 		CTASSERT(sizeof(buf) >= HUMANIZE_BYTES);
    294 		format_bytes(buf, HUMANIZE_BYTES, rump_physmemlimit);
    295 #undef HUMANIZE_BYTES
    296 		dddlim = 9 * (rump_physmemlimit / 10);
    297 	} else {
    298 		strlcpy(buf, "unlimited (host limit)", sizeof(buf));
    299 	}
    300 	aprint_verbose("total memory = %s\n", buf);
    301 
    302 	TAILQ_INIT(&vmpage_lruqueue);
    303 
    304 	uvmexp.free = 1024*1024; /* XXX: arbitrary & not updated */
    305 
    306 #ifndef __uvmexp_pagesize
    307 	uvmexp.pagesize = PAGE_SIZE;
    308 	uvmexp.pagemask = PAGE_MASK;
    309 	uvmexp.pageshift = PAGE_SHIFT;
    310 #else
    311 #define FAKE_PAGE_SHIFT 12
    312 	uvmexp.pageshift = FAKE_PAGE_SHIFT;
    313 	uvmexp.pagesize = 1<<FAKE_PAGE_SHIFT;
    314 	uvmexp.pagemask = (1<<FAKE_PAGE_SHIFT)-1;
    315 #undef FAKE_PAGE_SHIFT
    316 #endif
    317 
    318 	mutex_init(&pagermtx, MUTEX_DEFAULT, IPL_NONE);
    319 	mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, IPL_NONE);
    320 	mutex_init(&uvm_swap_data_lock, MUTEX_DEFAULT, IPL_NONE);
    321 
    322 	/* just to appease linkage */
    323 	mutex_init(&uvm_fpageqlock, MUTEX_SPIN, IPL_VM);
    324 
    325 	mutex_init(&pdaemonmtx, MUTEX_DEFAULT, IPL_NONE);
    326 	cv_init(&pdaemoncv, "pdaemon");
    327 	cv_init(&oomwait, "oomwait");
    328 
    329 	module_map = &module_map_store;
    330 
    331 	kernel_map->pmap = pmap_kernel();
    332 
    333 	pool_subsystem_init();
    334 
    335 	kmem_arena = vmem_create("kmem", 0, 1024*1024, PAGE_SIZE,
    336 	    NULL, NULL, NULL,
    337 	    0, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);
    338 
    339 	vmem_subsystem_init(kmem_arena);
    340 
    341 	kmem_va_arena = vmem_create("kva", 0, 0, PAGE_SIZE,
    342 	    vmem_alloc, vmem_free, kmem_arena,
    343 	    8 * PAGE_SIZE, VM_NOSLEEP | VM_BOOTSTRAP, IPL_VM);
    344 
    345 	pool_cache_bootstrap(&pagecache, sizeof(struct vm_page), 0, 0, 0,
    346 	    "page$", NULL, IPL_NONE, pgctor, pgdtor, NULL);
    347 }
    348 
    349 void
    350 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax,
    351     bool topdown)
    352 {
    353 
    354 	vm->vm_map.pmap = pmap_kernel();
    355 	vm->vm_refcnt = 1;
    356 }
    357 
    358 void
    359 uvm_pagewire(struct vm_page *pg)
    360 {
    361 
    362 	/* nada */
    363 }
    364 
    365 void
    366 uvm_pageunwire(struct vm_page *pg)
    367 {
    368 
    369 	/* nada */
    370 }
    371 
    372 /* where's your schmonz now? */
    373 #define PUNLIMIT(a)	\
    374 p->p_rlimit[a].rlim_cur = p->p_rlimit[a].rlim_max = RLIM_INFINITY;
    375 void
    376 uvm_init_limits(struct proc *p)
    377 {
    378 
    379 #ifndef DFLSSIZ
    380 #define DFLSSIZ (16*1024*1024)
    381 #endif
    382 	p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
    383 	p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
    384 	PUNLIMIT(RLIMIT_DATA);
    385 	PUNLIMIT(RLIMIT_RSS);
    386 	PUNLIMIT(RLIMIT_AS);
    387 	/* nice, cascade */
    388 }
    389 #undef PUNLIMIT
    390 
    391 /*
    392  * This satisfies the "disgusting mmap hack" used by proplib.
    393  * We probably should grow some more assertables to make sure we're
    394  * not satisfying anything we shouldn't be satisfying.
    395  */
    396 int
    397 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
    398 	vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim)
    399 {
    400 	void *uaddr;
    401 	int error;
    402 
    403 	if (prot != (VM_PROT_READ | VM_PROT_WRITE))
    404 		panic("uvm_mmap() variant unsupported");
    405 	if (flags != (MAP_PRIVATE | MAP_ANON))
    406 		panic("uvm_mmap() variant unsupported");
    407 
    408 	/* no reason in particular, but cf. uvm_default_mapaddr() */
    409 	if (*addr != 0)
    410 		panic("uvm_mmap() variant unsupported");
    411 
    412 	if (RUMP_LOCALPROC_P(curproc)) {
    413 		error = rumpuser_anonmmap(NULL, size, 0, 0, &uaddr);
    414 	} else {
    415 		error = rumpuser_sp_anonmmap(curproc->p_vmspace->vm_map.pmap,
    416 		    size, &uaddr);
    417 	}
    418 	if (error)
    419 		return error;
    420 
    421 	*addr = (vaddr_t)uaddr;
    422 	return 0;
    423 }
    424 
    425 struct pagerinfo {
    426 	vaddr_t pgr_kva;
    427 	int pgr_npages;
    428 	struct vm_page **pgr_pgs;
    429 	bool pgr_read;
    430 
    431 	LIST_ENTRY(pagerinfo) pgr_entries;
    432 };
    433 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist);
    434 
    435 /*
    436  * Pager "map" in routine.  Instead of mapping, we allocate memory
    437  * and copy page contents there.  Not optimal or even strictly
    438  * correct (the caller might modify the page contents after mapping
    439  * them in), but what the heck.  Assumes UVMPAGER_MAPIN_WAITOK.
    440  */
    441 vaddr_t
    442 uvm_pagermapin(struct vm_page **pgs, int npages, int flags)
    443 {
    444 	struct pagerinfo *pgri;
    445 	vaddr_t curkva;
    446 	int i;
    447 
    448 	/* allocate structures */
    449 	pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP);
    450 	pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP);
    451 	pgri->pgr_npages = npages;
    452 	pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP);
    453 	pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0;
    454 
    455 	/* copy contents to "mapped" memory */
    456 	for (i = 0, curkva = pgri->pgr_kva;
    457 	    i < npages;
    458 	    i++, curkva += PAGE_SIZE) {
    459 		/*
    460 		 * We need to copy the previous contents of the pages to
    461 		 * the window even if we are reading from the
    462 		 * device, since the device might not fill the contents of
    463 		 * the full mapped range and we will end up corrupting
    464 		 * data when we unmap the window.
    465 		 */
    466 		memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE);
    467 		pgri->pgr_pgs[i] = pgs[i];
    468 	}
    469 
    470 	mutex_enter(&pagermtx);
    471 	LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries);
    472 	mutex_exit(&pagermtx);
    473 
    474 	return pgri->pgr_kva;
    475 }
    476 
    477 /*
    478  * map out the pager window.  return contents from VA to page storage
    479  * and free structures.
    480  *
    481  * Note: does not currently support partial frees
    482  */
    483 void
    484 uvm_pagermapout(vaddr_t kva, int npages)
    485 {
    486 	struct pagerinfo *pgri;
    487 	vaddr_t curkva;
    488 	int i;
    489 
    490 	mutex_enter(&pagermtx);
    491 	LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
    492 		if (pgri->pgr_kva == kva)
    493 			break;
    494 	}
    495 	KASSERT(pgri);
    496 	if (pgri->pgr_npages != npages)
    497 		panic("uvm_pagermapout: partial unmapping not supported");
    498 	LIST_REMOVE(pgri, pgr_entries);
    499 	mutex_exit(&pagermtx);
    500 
    501 	if (pgri->pgr_read) {
    502 		for (i = 0, curkva = pgri->pgr_kva;
    503 		    i < pgri->pgr_npages;
    504 		    i++, curkva += PAGE_SIZE) {
    505 			memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE);
    506 		}
    507 	}
    508 
    509 	kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *));
    510 	kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE);
    511 	kmem_free(pgri, sizeof(*pgri));
    512 }
    513 
    514 /*
    515  * convert va in pager window to page structure.
    516  * XXX: how expensive is this (global lock, list traversal)?
    517  */
    518 struct vm_page *
    519 uvm_pageratop(vaddr_t va)
    520 {
    521 	struct pagerinfo *pgri;
    522 	struct vm_page *pg = NULL;
    523 	int i;
    524 
    525 	mutex_enter(&pagermtx);
    526 	LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
    527 		if (pgri->pgr_kva <= va
    528 		    && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE)
    529 			break;
    530 	}
    531 	if (pgri) {
    532 		i = (va - pgri->pgr_kva) >> PAGE_SHIFT;
    533 		pg = pgri->pgr_pgs[i];
    534 	}
    535 	mutex_exit(&pagermtx);
    536 
    537 	return pg;
    538 }
    539 
    540 /*
    541  * Called with the vm object locked.
    542  *
    543  * Put vnode object pages at the end of the access queue to indicate
    544  * they have been recently accessed and should not be immediate
    545  * candidates for pageout.  Do not do this for lookups done by
    546  * the pagedaemon to mimic pmap_kentered mappings which don't track
    547  * access information.
    548  */
    549 struct vm_page *
    550 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
    551 {
    552 	struct vm_page *pg;
    553 	bool ispagedaemon = curlwp == uvm.pagedaemon_lwp;
    554 
    555 	pg = radix_tree_lookup_node(&uobj->uo_pages, off >> PAGE_SHIFT);
    556 	if (pg && !UVM_OBJ_IS_AOBJ(pg->uobject) && !ispagedaemon) {
    557 		mutex_enter(&uvm_pageqlock);
    558 		TAILQ_REMOVE(&vmpage_lruqueue, pg, pageq.queue);
    559 		TAILQ_INSERT_TAIL(&vmpage_lruqueue, pg, pageq.queue);
    560 		mutex_exit(&uvm_pageqlock);
    561 	}
    562 
    563 	return pg;
    564 }
    565 
    566 void
    567 uvm_page_unbusy(struct vm_page **pgs, int npgs)
    568 {
    569 	struct vm_page *pg;
    570 	int i;
    571 
    572 	KASSERT(npgs > 0);
    573 	KASSERT(mutex_owned(pgs[0]->uobject->vmobjlock));
    574 
    575 	for (i = 0; i < npgs; i++) {
    576 		pg = pgs[i];
    577 		if (pg == NULL)
    578 			continue;
    579 
    580 		KASSERT(pg->flags & PG_BUSY);
    581 		if (pg->flags & PG_WANTED)
    582 			wakeup(pg);
    583 		if (pg->flags & PG_RELEASED)
    584 			uvm_pagefree(pg);
    585 		else
    586 			pg->flags &= ~(PG_WANTED|PG_BUSY);
    587 	}
    588 }
    589 
    590 void
    591 uvm_estimatepageable(int *active, int *inactive)
    592 {
    593 
    594 	/* XXX: guessing game */
    595 	*active = 1024;
    596 	*inactive = 1024;
    597 }
    598 
    599 bool
    600 vm_map_starved_p(struct vm_map *map)
    601 {
    602 
    603 	if (map->flags & VM_MAP_WANTVA)
    604 		return true;
    605 
    606 	return false;
    607 }
    608 
    609 int
    610 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
    611 {
    612 
    613 	panic("%s: unimplemented", __func__);
    614 }
    615 
    616 void
    617 uvm_unloan(void *v, int npages, int flags)
    618 {
    619 
    620 	panic("%s: unimplemented", __func__);
    621 }
    622 
    623 int
    624 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
    625 	struct vm_page **opp)
    626 {
    627 
    628 	return EBUSY;
    629 }
    630 
    631 struct vm_page *
    632 uvm_loanbreak(struct vm_page *pg)
    633 {
    634 
    635 	panic("%s: unimplemented", __func__);
    636 }
    637 
    638 int
    639 uvm_loanobj(struct uvm_object *uobj, struct uio *uio, int advice)
    640 {
    641 
    642 	return ENOTSUP;
    643 }
    644 
    645 void
    646 ubc_purge(struct uvm_object *uobj)
    647 {
    648 
    649 }
    650 
    651 vaddr_t
    652 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz)
    653 {
    654 
    655 	return 0;
    656 }
    657 
    658 int
    659 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
    660 	vm_prot_t prot, bool set_max)
    661 {
    662 
    663 	return EOPNOTSUPP;
    664 }
    665 
    666 /*
    667  * UVM km
    668  */
    669 
    670 vaddr_t
    671 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
    672 {
    673 	void *rv, *desired = NULL;
    674 	int alignbit, error;
    675 
    676 #ifdef __x86_64__
    677 	/*
    678 	 * On amd64, allocate all module memory from the lowest 2GB.
    679 	 * This is because NetBSD kernel modules are compiled
    680 	 * with -mcmodel=kernel and reserve only 4 bytes for
    681 	 * offsets.  If we load code compiled with -mcmodel=kernel
    682 	 * anywhere except the lowest or highest 2GB, it will not
    683 	 * work.  Since userspace does not have access to the highest
    684 	 * 2GB, use the lowest 2GB.
    685 	 *
    686 	 * Note: this assumes the rump kernel resides in
    687 	 * the lowest 2GB as well.
    688 	 *
    689 	 * Note2: yes, it's a quick hack, but since this the only
    690 	 * place where we care about the map we're allocating from,
    691 	 * just use a simple "if" instead of coming up with a fancy
    692 	 * generic solution.
    693 	 */
    694 	if (map == module_map) {
    695 		desired = (void *)(0x80000000 - size);
    696 	}
    697 #endif
    698 
    699 	if (__predict_false(map == module_map)) {
    700 		alignbit = 0;
    701 		if (align) {
    702 			alignbit = ffs(align)-1;
    703 		}
    704 		error = rumpuser_anonmmap(desired, size, alignbit,
    705 		    flags & UVM_KMF_EXEC, &rv);
    706 	} else {
    707 		error = rumpuser_malloc(size, align, &rv);
    708 	}
    709 
    710 	if (error) {
    711 		if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT))
    712 			return 0;
    713 		else
    714 			panic("uvm_km_alloc failed");
    715 	}
    716 
    717 	if (flags & UVM_KMF_ZERO)
    718 		memset(rv, 0, size);
    719 
    720 	return (vaddr_t)rv;
    721 }
    722 
    723 void
    724 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
    725 {
    726 
    727 	if (__predict_false(map == module_map))
    728 		rumpuser_unmap((void *)vaddr, size);
    729 	else
    730 		rumpuser_free((void *)vaddr, size);
    731 }
    732 
    733 struct vm_map *
    734 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
    735 	vsize_t size, int pageable, bool fixed, struct vm_map *submap)
    736 {
    737 
    738 	return (struct vm_map *)417416;
    739 }
    740 
    741 int
    742 uvm_km_kmem_alloc(vmem_t *vm, vmem_size_t size, vm_flag_t flags,
    743     vmem_addr_t *addr)
    744 {
    745 	vaddr_t va;
    746 	va = (vaddr_t)rump_hypermalloc(size, PAGE_SIZE,
    747 	    (flags & VM_SLEEP), "kmalloc");
    748 
    749 	if (va) {
    750 		*addr = va;
    751 		return 0;
    752 	} else {
    753 		return ENOMEM;
    754 	}
    755 }
    756 
    757 void
    758 uvm_km_kmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size)
    759 {
    760 
    761 	rump_hyperfree((void *)addr, size);
    762 }
    763 
    764 /*
    765  * VM space locking routines.  We don't really have to do anything,
    766  * since the pages are always "wired" (both local and remote processes).
    767  */
    768 int
    769 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access)
    770 {
    771 
    772 	return 0;
    773 }
    774 
    775 void
    776 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
    777 {
    778 
    779 }
    780 
    781 /*
    782  * For the local case the buffer mappers don't need to do anything.
    783  * For the remote case we need to reserve space and copy data in or
    784  * out, depending on B_READ/B_WRITE.
    785  */
    786 int
    787 vmapbuf(struct buf *bp, vsize_t len)
    788 {
    789 	int error = 0;
    790 
    791 	bp->b_saveaddr = bp->b_data;
    792 
    793 	/* remote case */
    794 	if (!RUMP_LOCALPROC_P(curproc)) {
    795 		bp->b_data = rump_hypermalloc(len, 0, true, "vmapbuf");
    796 		if (BUF_ISWRITE(bp)) {
    797 			error = copyin(bp->b_saveaddr, bp->b_data, len);
    798 			if (error) {
    799 				rump_hyperfree(bp->b_data, len);
    800 				bp->b_data = bp->b_saveaddr;
    801 				bp->b_saveaddr = 0;
    802 			}
    803 		}
    804 	}
    805 
    806 	return error;
    807 }
    808 
    809 void
    810 vunmapbuf(struct buf *bp, vsize_t len)
    811 {
    812 
    813 	/* remote case */
    814 	if (!RUMP_LOCALPROC_P(bp->b_proc)) {
    815 		if (BUF_ISREAD(bp)) {
    816 			bp->b_error = copyout_proc(bp->b_proc,
    817 			    bp->b_data, bp->b_saveaddr, len);
    818 		}
    819 		rump_hyperfree(bp->b_data, len);
    820 	}
    821 
    822 	bp->b_data = bp->b_saveaddr;
    823 	bp->b_saveaddr = 0;
    824 }
    825 
    826 void
    827 uvmspace_addref(struct vmspace *vm)
    828 {
    829 
    830 	/*
    831 	 * No dynamically allocated vmspaces exist.
    832 	 */
    833 }
    834 
    835 void
    836 uvmspace_free(struct vmspace *vm)
    837 {
    838 
    839 	/* nothing for now */
    840 }
    841 
    842 /*
    843  * page life cycle stuff.  it really doesn't exist, so just stubs.
    844  */
    845 
    846 void
    847 uvm_pageactivate(struct vm_page *pg)
    848 {
    849 
    850 	/* nada */
    851 }
    852 
    853 void
    854 uvm_pagedeactivate(struct vm_page *pg)
    855 {
    856 
    857 	/* nada */
    858 }
    859 
    860 void
    861 uvm_pagedequeue(struct vm_page *pg)
    862 {
    863 
    864 	/* nada*/
    865 }
    866 
    867 void
    868 uvm_pageenqueue(struct vm_page *pg)
    869 {
    870 
    871 	/* nada */
    872 }
    873 
    874 void
    875 uvmpdpol_anfree(struct vm_anon *an)
    876 {
    877 
    878 	/* nada */
    879 }
    880 
    881 /*
    882  * Physical address accessors.
    883  */
    884 
    885 struct vm_page *
    886 uvm_phys_to_vm_page(paddr_t pa)
    887 {
    888 
    889 	return NULL;
    890 }
    891 
    892 paddr_t
    893 uvm_vm_page_to_phys(const struct vm_page *pg)
    894 {
    895 
    896 	return 0;
    897 }
    898 
    899 vaddr_t
    900 uvm_uarea_alloc(void)
    901 {
    902 
    903 	/* non-zero */
    904 	return (vaddr_t)11;
    905 }
    906 
    907 void
    908 uvm_uarea_free(vaddr_t uarea)
    909 {
    910 
    911 	/* nata, so creamy */
    912 }
    913 
    914 /*
    915  * Routines related to the Page Baroness.
    916  */
    917 
    918 void
    919 uvm_wait(const char *msg)
    920 {
    921 
    922 	if (__predict_false(rump_threads == 0))
    923 		panic("pagedaemon missing (RUMP_THREADS = 0)");
    924 
    925 	if (curlwp == uvm.pagedaemon_lwp) {
    926 		/* is it possible for us to later get memory? */
    927 		if (!uvmexp.paging)
    928 			panic("pagedaemon out of memory");
    929 	}
    930 
    931 	mutex_enter(&pdaemonmtx);
    932 	pdaemon_waiters++;
    933 	cv_signal(&pdaemoncv);
    934 	cv_wait(&oomwait, &pdaemonmtx);
    935 	mutex_exit(&pdaemonmtx);
    936 }
    937 
    938 void
    939 uvm_pageout_start(int npages)
    940 {
    941 
    942 	mutex_enter(&pdaemonmtx);
    943 	uvmexp.paging += npages;
    944 	mutex_exit(&pdaemonmtx);
    945 }
    946 
    947 void
    948 uvm_pageout_done(int npages)
    949 {
    950 
    951 	if (!npages)
    952 		return;
    953 
    954 	mutex_enter(&pdaemonmtx);
    955 	KASSERT(uvmexp.paging >= npages);
    956 	uvmexp.paging -= npages;
    957 
    958 	if (pdaemon_waiters) {
    959 		pdaemon_waiters = 0;
    960 		cv_broadcast(&oomwait);
    961 	}
    962 	mutex_exit(&pdaemonmtx);
    963 }
    964 
    965 static bool
    966 processpage(struct vm_page *pg, bool *lockrunning)
    967 {
    968 	struct uvm_object *uobj;
    969 
    970 	uobj = pg->uobject;
    971 	if (mutex_tryenter(uobj->vmobjlock)) {
    972 		if ((pg->flags & PG_BUSY) == 0) {
    973 			mutex_exit(&uvm_pageqlock);
    974 			uobj->pgops->pgo_put(uobj, pg->offset,
    975 			    pg->offset + PAGE_SIZE,
    976 			    PGO_CLEANIT|PGO_FREE);
    977 			KASSERT(!mutex_owned(uobj->vmobjlock));
    978 			return true;
    979 		} else {
    980 			mutex_exit(uobj->vmobjlock);
    981 		}
    982 	} else if (*lockrunning == false && ncpu > 1) {
    983 		CPU_INFO_ITERATOR cii;
    984 		struct cpu_info *ci;
    985 		struct lwp *l;
    986 
    987 		l = mutex_owner(uobj->vmobjlock);
    988 		for (CPU_INFO_FOREACH(cii, ci)) {
    989 			if (ci->ci_curlwp == l) {
    990 				*lockrunning = true;
    991 				break;
    992 			}
    993 		}
    994 	}
    995 
    996 	return false;
    997 }
    998 
    999 /*
   1000  * The Diabolical pageDaemon Director (DDD).
   1001  *
   1002  * This routine can always use better heuristics.
   1003  */
   1004 void
   1005 uvm_pageout(void *arg)
   1006 {
   1007 	struct vm_page *pg;
   1008 	struct pool *pp, *pp_first;
   1009 	int cleaned, skip, skipped;
   1010 	bool succ;
   1011 	bool lockrunning;
   1012 
   1013 	mutex_enter(&pdaemonmtx);
   1014 	for (;;) {
   1015 		if (!NEED_PAGEDAEMON()) {
   1016 			kernel_map->flags &= ~VM_MAP_WANTVA;
   1017 		}
   1018 
   1019 		if (pdaemon_waiters) {
   1020 			pdaemon_waiters = 0;
   1021 			cv_broadcast(&oomwait);
   1022 		}
   1023 
   1024 		cv_wait(&pdaemoncv, &pdaemonmtx);
   1025 		uvmexp.pdwoke++;
   1026 
   1027 		/* tell the world that we are hungry */
   1028 		kernel_map->flags |= VM_MAP_WANTVA;
   1029 		mutex_exit(&pdaemonmtx);
   1030 
   1031 		/*
   1032 		 * step one: reclaim the page cache.  this should give
   1033 		 * us the biggest earnings since whole pages are released
   1034 		 * into backing memory.
   1035 		 */
   1036 		pool_cache_reclaim(&pagecache);
   1037 		if (!NEED_PAGEDAEMON()) {
   1038 			mutex_enter(&pdaemonmtx);
   1039 			continue;
   1040 		}
   1041 
   1042 		/*
   1043 		 * Ok, so that didn't help.  Next, try to hunt memory
   1044 		 * by pushing out vnode pages.  The pages might contain
   1045 		 * useful cached data, but we need the memory.
   1046 		 */
   1047 		cleaned = 0;
   1048 		skip = 0;
   1049 		lockrunning = false;
   1050  again:
   1051 		mutex_enter(&uvm_pageqlock);
   1052 		while (cleaned < PAGEDAEMON_OBJCHUNK) {
   1053 			skipped = 0;
   1054 			TAILQ_FOREACH(pg, &vmpage_lruqueue, pageq.queue) {
   1055 
   1056 				/*
   1057 				 * skip over pages we _might_ have tried
   1058 				 * to handle earlier.  they might not be
   1059 				 * exactly the same ones, but I'm not too
   1060 				 * concerned.
   1061 				 */
   1062 				while (skipped++ < skip)
   1063 					continue;
   1064 
   1065 				if (processpage(pg, &lockrunning)) {
   1066 					cleaned++;
   1067 					goto again;
   1068 				}
   1069 
   1070 				skip++;
   1071 			}
   1072 			break;
   1073 		}
   1074 		mutex_exit(&uvm_pageqlock);
   1075 
   1076 		/*
   1077 		 * Ok, someone is running with an object lock held.
   1078 		 * We want to yield the host CPU to make sure the
   1079 		 * thread is not parked on the host.  Since sched_yield()
   1080 		 * doesn't appear to do anything on NetBSD, nanosleep
   1081 		 * for the smallest possible time and hope we're back in
   1082 		 * the game soon.
   1083 		 */
   1084 		if (cleaned == 0 && lockrunning) {
   1085 			rumpuser_clock_sleep(RUMPUSER_CLOCK_RELWALL, 0, 1);
   1086 
   1087 			lockrunning = false;
   1088 			skip = 0;
   1089 
   1090 			/* and here we go again */
   1091 			goto again;
   1092 		}
   1093 
   1094 		/*
   1095 		 * And of course we need to reclaim the page cache
   1096 		 * again to actually release memory.
   1097 		 */
   1098 		pool_cache_reclaim(&pagecache);
   1099 		if (!NEED_PAGEDAEMON()) {
   1100 			mutex_enter(&pdaemonmtx);
   1101 			continue;
   1102 		}
   1103 
   1104 		/*
   1105 		 * And then drain the pools.  Wipe them out ... all of them.
   1106 		 */
   1107 		for (pp_first = NULL;;) {
   1108 			rump_vfs_drainbufs(10 /* XXX: estimate! */);
   1109 
   1110 			succ = pool_drain(&pp);
   1111 			if (succ || pp == pp_first)
   1112 				break;
   1113 
   1114 			if (pp_first == NULL)
   1115 				pp_first = pp;
   1116 		}
   1117 
   1118 		/*
   1119 		 * Need to use PYEC on our bag of tricks.
   1120 		 * Unfortunately, the wife just borrowed it.
   1121 		 */
   1122 
   1123 		mutex_enter(&pdaemonmtx);
   1124 		if (!succ && cleaned == 0 && pdaemon_waiters &&
   1125 		    uvmexp.paging == 0) {
   1126 			rumpuser_dprintf("pagedaemoness: failed to reclaim "
   1127 			    "memory ... sleeping (deadlock?)\n");
   1128 			cv_timedwait(&pdaemoncv, &pdaemonmtx, hz);
   1129 		}
   1130 	}
   1131 
   1132 	panic("you can swap out any time you like, but you can never leave");
   1133 }
   1134 
   1135 void
   1136 uvm_kick_pdaemon()
   1137 {
   1138 
   1139 	/*
   1140 	 * Wake up the diabolical pagedaemon director if we are over
   1141 	 * 90% of the memory limit.  This is a complete and utter
   1142 	 * stetson-harrison decision which you are allowed to finetune.
   1143 	 * Don't bother locking.  If we have some unflushed caches,
   1144 	 * other waker-uppers will deal with the issue.
   1145 	 */
   1146 	if (NEED_PAGEDAEMON()) {
   1147 		cv_signal(&pdaemoncv);
   1148 	}
   1149 }
   1150 
   1151 void *
   1152 rump_hypermalloc(size_t howmuch, int alignment, bool waitok, const char *wmsg)
   1153 {
   1154 	const unsigned long thelimit =
   1155 	    curlwp == uvm.pagedaemon_lwp ? pdlimit : rump_physmemlimit;
   1156 	unsigned long newmem;
   1157 	void *rv;
   1158 	int error;
   1159 
   1160 	uvm_kick_pdaemon(); /* ouch */
   1161 
   1162 	/* first we must be within the limit */
   1163  limitagain:
   1164 	if (thelimit != RUMPMEM_UNLIMITED) {
   1165 		newmem = atomic_add_long_nv(&curphysmem, howmuch);
   1166 		if (newmem > thelimit) {
   1167 			newmem = atomic_add_long_nv(&curphysmem, -howmuch);
   1168 			if (!waitok) {
   1169 				return NULL;
   1170 			}
   1171 			uvm_wait(wmsg);
   1172 			goto limitagain;
   1173 		}
   1174 	}
   1175 
   1176 	/* second, we must get something from the backend */
   1177  again:
   1178 	error = rumpuser_malloc(howmuch, alignment, &rv);
   1179 	if (__predict_false(error && waitok)) {
   1180 		uvm_wait(wmsg);
   1181 		goto again;
   1182 	}
   1183 
   1184 	return rv;
   1185 }
   1186 
   1187 void
   1188 rump_hyperfree(void *what, size_t size)
   1189 {
   1190 
   1191 	if (rump_physmemlimit != RUMPMEM_UNLIMITED) {
   1192 		atomic_add_long(&curphysmem, -size);
   1193 	}
   1194 	rumpuser_free(what, size);
   1195 }
   1196