Home | History | Annotate | Line # | Download | only in rumpkern
vm.c revision 1.66
      1 /*	$NetBSD: vm.c,v 1.66 2009/11/04 16:55:20 pooka Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
      5  *
      6  * Development of this software was supported by Google Summer of Code.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     18  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     20  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     23  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     27  * SUCH DAMAGE.
     28  */
     29 
     30 /*
     31  * Virtual memory emulation routines.  Contents:
     32  *  + anon objects & pager
     33  *  + misc support routines
     34  *  + kmem
     35  */
     36 
     37 /*
     38  * XXX: we abuse pg->uanon for the virtual address of the storage
     39  * for each page.  phys_addr would fit the job description better,
     40  * except that it will create unnecessary lossage on some platforms
     41  * due to not being a pointer type.
     42  */
     43 
     44 #include <sys/cdefs.h>
     45 __KERNEL_RCSID(0, "$NetBSD: vm.c,v 1.66 2009/11/04 16:55:20 pooka Exp $");
     46 
     47 #include <sys/param.h>
     48 #include <sys/atomic.h>
     49 #include <sys/null.h>
     50 #include <sys/vnode.h>
     51 #include <sys/buf.h>
     52 #include <sys/kmem.h>
     53 
     54 #include <machine/pmap.h>
     55 
     56 #include <rump/rumpuser.h>
     57 
     58 #include <uvm/uvm.h>
     59 #include <uvm/uvm_ddb.h>
     60 #include <uvm/uvm_prot.h>
     61 #include <uvm/uvm_readahead.h>
     62 
     63 #include "rump_private.h"
     64 
     65 static int ao_get(struct uvm_object *, voff_t, struct vm_page **,
     66 	int *, int, vm_prot_t, int, int);
     67 static int ao_put(struct uvm_object *, voff_t, voff_t, int);
     68 
     69 const struct uvm_pagerops aobj_pager = {
     70 	.pgo_get = ao_get,
     71 	.pgo_put = ao_put,
     72 };
     73 
     74 kmutex_t uvm_pageqlock;
     75 
     76 struct uvmexp uvmexp;
     77 struct uvm uvm;
     78 
     79 struct vmspace rump_vmspace;
     80 struct vm_map rump_vmmap;
     81 static struct vm_map_kernel kmem_map_store;
     82 struct vm_map *kmem_map = &kmem_map_store.vmk_map;
     83 const struct rb_tree_ops uvm_page_tree_ops;
     84 
     85 static struct vm_map_kernel kernel_map_store;
     86 struct vm_map *kernel_map = &kernel_map_store.vmk_map;
     87 
     88 /*
     89  * vm pages
     90  */
     91 
     92 /* called with the object locked */
     93 struct vm_page *
     94 rumpvm_makepage(struct uvm_object *uobj, voff_t off)
     95 {
     96 	struct vm_page *pg;
     97 
     98 	pg = kmem_zalloc(sizeof(struct vm_page), KM_SLEEP);
     99 	pg->offset = off;
    100 	pg->uobject = uobj;
    101 
    102 	pg->uanon = (void *)kmem_zalloc(PAGE_SIZE, KM_SLEEP);
    103 	pg->flags = PG_CLEAN|PG_BUSY|PG_FAKE;
    104 
    105 	TAILQ_INSERT_TAIL(&uobj->memq, pg, listq.queue);
    106 	uobj->uo_npages++;
    107 
    108 	return pg;
    109 }
    110 
    111 /* these are going away very soon */
    112 void rumpvm_enterva(vaddr_t addr, struct vm_page *pg) {}
    113 void rumpvm_flushva(struct uvm_object *uobj) {}
    114 
    115 struct vm_page *
    116 uvm_pagealloc_strat(struct uvm_object *uobj, voff_t off, struct vm_anon *anon,
    117 	int flags, int strat, int free_list)
    118 {
    119 
    120 	return rumpvm_makepage(uobj, off);
    121 }
    122 
    123 /*
    124  * Release a page.
    125  *
    126  * Called with the vm object locked.
    127  */
    128 void
    129 uvm_pagefree(struct vm_page *pg)
    130 {
    131 	struct uvm_object *uobj = pg->uobject;
    132 
    133 	if (pg->flags & PG_WANTED)
    134 		wakeup(pg);
    135 
    136 	uobj->uo_npages--;
    137 	TAILQ_REMOVE(&uobj->memq, pg, listq.queue);
    138 	kmem_free((void *)pg->uanon, PAGE_SIZE);
    139 	kmem_free(pg, sizeof(*pg));
    140 }
    141 
    142 void
    143 uvm_pagezero(struct vm_page *pg)
    144 {
    145 
    146 	pg->flags &= ~PG_CLEAN;
    147 	memset((void *)pg->uanon, 0, PAGE_SIZE);
    148 }
    149 
    150 /*
    151  * Anon object stuff
    152  */
    153 
    154 static int
    155 ao_get(struct uvm_object *uobj, voff_t off, struct vm_page **pgs,
    156 	int *npages, int centeridx, vm_prot_t access_type,
    157 	int advice, int flags)
    158 {
    159 	struct vm_page *pg;
    160 	int i;
    161 
    162 	if (centeridx)
    163 		panic("%s: centeridx != 0 not supported", __func__);
    164 
    165 	/* loop over pages */
    166 	off = trunc_page(off);
    167 	for (i = 0; i < *npages; i++) {
    168  retrylookup:
    169 		pg = uvm_pagelookup(uobj, off + (i << PAGE_SHIFT));
    170 		if (pg) {
    171 			if (pg->flags & PG_BUSY) {
    172 				pg->flags |= PG_WANTED;
    173 				UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
    174 				    "aogetpg", 0);
    175 				goto retrylookup;
    176 			}
    177 			pg->flags |= PG_BUSY;
    178 			pgs[i] = pg;
    179 		} else {
    180 			pg = rumpvm_makepage(uobj, off + (i << PAGE_SHIFT));
    181 			pgs[i] = pg;
    182 		}
    183 	}
    184 	mutex_exit(&uobj->vmobjlock);
    185 
    186 	return 0;
    187 
    188 }
    189 
    190 static int
    191 ao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
    192 {
    193 	struct vm_page *pg;
    194 
    195 	/* we only free all pages for now */
    196 	if ((flags & PGO_FREE) == 0 || (flags & PGO_ALLPAGES) == 0) {
    197 		mutex_exit(&uobj->vmobjlock);
    198 		return 0;
    199 	}
    200 
    201 	while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL)
    202 		uvm_pagefree(pg);
    203 	mutex_exit(&uobj->vmobjlock);
    204 
    205 	return 0;
    206 }
    207 
    208 struct uvm_object *
    209 uao_create(vsize_t size, int flags)
    210 {
    211 	struct uvm_object *uobj;
    212 
    213 	uobj = kmem_zalloc(sizeof(struct uvm_object), KM_SLEEP);
    214 	uobj->pgops = &aobj_pager;
    215 	TAILQ_INIT(&uobj->memq);
    216 	mutex_init(&uobj->vmobjlock, MUTEX_DEFAULT, IPL_NONE);
    217 
    218 	return uobj;
    219 }
    220 
    221 void
    222 uao_detach(struct uvm_object *uobj)
    223 {
    224 
    225 	mutex_enter(&uobj->vmobjlock);
    226 	ao_put(uobj, 0, 0, PGO_ALLPAGES | PGO_FREE);
    227 	mutex_destroy(&uobj->vmobjlock);
    228 	kmem_free(uobj, sizeof(*uobj));
    229 }
    230 
    231 /*
    232  * Misc routines
    233  */
    234 
    235 static kmutex_t pagermtx;
    236 
    237 void
    238 rumpvm_init(void)
    239 {
    240 
    241 	uvmexp.free = 1024*1024; /* XXX */
    242 	uvm.pagedaemon_lwp = NULL; /* doesn't match curlwp */
    243 	rump_vmspace.vm_map.pmap = pmap_kernel();
    244 
    245 	mutex_init(&pagermtx, MUTEX_DEFAULT, 0);
    246 	mutex_init(&uvm_pageqlock, MUTEX_DEFAULT, 0);
    247 
    248 	kernel_map->pmap = pmap_kernel();
    249 	callback_head_init(&kernel_map_store.vmk_reclaim_callback, IPL_VM);
    250 	kmem_map->pmap = pmap_kernel();
    251 	callback_head_init(&kmem_map_store.vmk_reclaim_callback, IPL_VM);
    252 }
    253 
    254 
    255 
    256 void
    257 uvm_pagewire(struct vm_page *pg)
    258 {
    259 
    260 	/* nada */
    261 }
    262 
    263 void
    264 uvm_pageunwire(struct vm_page *pg)
    265 {
    266 
    267 	/* nada */
    268 }
    269 
    270 int
    271 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
    272 	vm_prot_t maxprot, int flags, void *handle, voff_t off, vsize_t locklim)
    273 {
    274 
    275 	panic("%s: unimplemented", __func__);
    276 }
    277 
    278 struct pagerinfo {
    279 	vaddr_t pgr_kva;
    280 	int pgr_npages;
    281 	struct vm_page **pgr_pgs;
    282 	bool pgr_read;
    283 
    284 	LIST_ENTRY(pagerinfo) pgr_entries;
    285 };
    286 static LIST_HEAD(, pagerinfo) pagerlist = LIST_HEAD_INITIALIZER(pagerlist);
    287 
    288 /*
    289  * Pager "map" in routine.  Instead of mapping, we allocate memory
    290  * and copy page contents there.  Not optimal or even strictly
    291  * correct (the caller might modify the page contents after mapping
    292  * them in), but what the heck.  Assumes UVMPAGER_MAPIN_WAITOK.
    293  */
    294 vaddr_t
    295 uvm_pagermapin(struct vm_page **pgs, int npages, int flags)
    296 {
    297 	struct pagerinfo *pgri;
    298 	vaddr_t curkva;
    299 	int i;
    300 
    301 	/* allocate structures */
    302 	pgri = kmem_alloc(sizeof(*pgri), KM_SLEEP);
    303 	pgri->pgr_kva = (vaddr_t)kmem_alloc(npages * PAGE_SIZE, KM_SLEEP);
    304 	pgri->pgr_npages = npages;
    305 	pgri->pgr_pgs = kmem_alloc(sizeof(struct vm_page *) * npages, KM_SLEEP);
    306 	pgri->pgr_read = (flags & UVMPAGER_MAPIN_READ) != 0;
    307 
    308 	/* copy contents to "mapped" memory */
    309 	for (i = 0, curkva = pgri->pgr_kva;
    310 	    i < npages;
    311 	    i++, curkva += PAGE_SIZE) {
    312 		/*
    313 		 * We need to copy the previous contents of the pages to
    314 		 * the window even if we are reading from the
    315 		 * device, since the device might not fill the contents of
    316 		 * the full mapped range and we will end up corrupting
    317 		 * data when we unmap the window.
    318 		 */
    319 		memcpy((void*)curkva, pgs[i]->uanon, PAGE_SIZE);
    320 		pgri->pgr_pgs[i] = pgs[i];
    321 	}
    322 
    323 	mutex_enter(&pagermtx);
    324 	LIST_INSERT_HEAD(&pagerlist, pgri, pgr_entries);
    325 	mutex_exit(&pagermtx);
    326 
    327 	return pgri->pgr_kva;
    328 }
    329 
    330 /*
    331  * map out the pager window.  return contents from VA to page storage
    332  * and free structures.
    333  *
    334  * Note: does not currently support partial frees
    335  */
    336 void
    337 uvm_pagermapout(vaddr_t kva, int npages)
    338 {
    339 	struct pagerinfo *pgri;
    340 	vaddr_t curkva;
    341 	int i;
    342 
    343 	mutex_enter(&pagermtx);
    344 	LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
    345 		if (pgri->pgr_kva == kva)
    346 			break;
    347 	}
    348 	KASSERT(pgri);
    349 	if (pgri->pgr_npages != npages)
    350 		panic("uvm_pagermapout: partial unmapping not supported");
    351 	LIST_REMOVE(pgri, pgr_entries);
    352 	mutex_exit(&pagermtx);
    353 
    354 	if (pgri->pgr_read) {
    355 		for (i = 0, curkva = pgri->pgr_kva;
    356 		    i < pgri->pgr_npages;
    357 		    i++, curkva += PAGE_SIZE) {
    358 			memcpy(pgri->pgr_pgs[i]->uanon,(void*)curkva,PAGE_SIZE);
    359 		}
    360 	}
    361 
    362 	kmem_free(pgri->pgr_pgs, npages * sizeof(struct vm_page *));
    363 	kmem_free((void*)pgri->pgr_kva, npages * PAGE_SIZE);
    364 	kmem_free(pgri, sizeof(*pgri));
    365 }
    366 
    367 /*
    368  * convert va in pager window to page structure.
    369  * XXX: how expensive is this (global lock, list traversal)?
    370  */
    371 struct vm_page *
    372 uvm_pageratop(vaddr_t va)
    373 {
    374 	struct pagerinfo *pgri;
    375 	struct vm_page *pg = NULL;
    376 	int i;
    377 
    378 	mutex_enter(&pagermtx);
    379 	LIST_FOREACH(pgri, &pagerlist, pgr_entries) {
    380 		if (pgri->pgr_kva <= va
    381 		    && va < pgri->pgr_kva + pgri->pgr_npages*PAGE_SIZE)
    382 			break;
    383 	}
    384 	if (pgri) {
    385 		i = (va - pgri->pgr_kva) >> PAGE_SHIFT;
    386 		pg = pgri->pgr_pgs[i];
    387 	}
    388 	mutex_exit(&pagermtx);
    389 
    390 	return pg;
    391 }
    392 
    393 /* Called with the vm object locked */
    394 struct vm_page *
    395 uvm_pagelookup(struct uvm_object *uobj, voff_t off)
    396 {
    397 	struct vm_page *pg;
    398 
    399 	TAILQ_FOREACH(pg, &uobj->memq, listq.queue) {
    400 		if (pg->offset == off) {
    401 			return pg;
    402 		}
    403 	}
    404 
    405 	return NULL;
    406 }
    407 
    408 void
    409 uvm_page_unbusy(struct vm_page **pgs, int npgs)
    410 {
    411 	struct vm_page *pg;
    412 	int i;
    413 
    414 	for (i = 0; i < npgs; i++) {
    415 		pg = pgs[i];
    416 		if (pg == NULL)
    417 			continue;
    418 
    419 		KASSERT(pg->flags & PG_BUSY);
    420 		if (pg->flags & PG_WANTED)
    421 			wakeup(pg);
    422 		if (pg->flags & PG_RELEASED)
    423 			uvm_pagefree(pg);
    424 		else
    425 			pg->flags &= ~(PG_WANTED|PG_BUSY);
    426 	}
    427 }
    428 
    429 void
    430 uvm_estimatepageable(int *active, int *inactive)
    431 {
    432 
    433 	/* XXX: guessing game */
    434 	*active = 1024;
    435 	*inactive = 1024;
    436 }
    437 
    438 struct vm_map_kernel *
    439 vm_map_to_kernel(struct vm_map *map)
    440 {
    441 
    442 	return (struct vm_map_kernel *)map;
    443 }
    444 
    445 bool
    446 vm_map_starved_p(struct vm_map *map)
    447 {
    448 
    449 	return false;
    450 }
    451 
    452 void
    453 uvm_pageout_start(int npages)
    454 {
    455 
    456 	uvmexp.paging += npages;
    457 }
    458 
    459 void
    460 uvm_pageout_done(int npages)
    461 {
    462 
    463 	uvmexp.paging -= npages;
    464 
    465 	/*
    466 	 * wake up either of pagedaemon or LWPs waiting for it.
    467 	 */
    468 
    469 	if (uvmexp.free <= uvmexp.reserve_kernel) {
    470 		wakeup(&uvm.pagedaemon);
    471 	} else {
    472 		wakeup(&uvmexp.free);
    473 	}
    474 }
    475 
    476 int
    477 uvm_loan(struct vm_map *map, vaddr_t start, vsize_t len, void *v, int flags)
    478 {
    479 
    480 	panic("%s: unimplemented", __func__);
    481 }
    482 
    483 void
    484 uvm_unloan(void *v, int npages, int flags)
    485 {
    486 
    487 	panic("%s: unimplemented", __func__);
    488 }
    489 
    490 int
    491 uvm_loanuobjpages(struct uvm_object *uobj, voff_t pgoff, int orignpages,
    492 	struct vm_page **opp)
    493 {
    494 
    495 	panic("%s: unimplemented", __func__);
    496 }
    497 
    498 void
    499 uvm_object_printit(struct uvm_object *uobj, bool full,
    500 	void (*pr)(const char *, ...))
    501 {
    502 
    503 	/* nada for now */
    504 }
    505 
    506 /*
    507  * Kmem
    508  */
    509 
    510 #ifndef RUMP_USE_REAL_ALLOCATORS
    511 void
    512 kmem_init()
    513 {
    514 
    515 	/* nothing to do */
    516 }
    517 
    518 void *
    519 kmem_alloc(size_t size, km_flag_t kmflag)
    520 {
    521 
    522 	return rumpuser_malloc(size, kmflag == KM_NOSLEEP);
    523 }
    524 
    525 void *
    526 kmem_zalloc(size_t size, km_flag_t kmflag)
    527 {
    528 	void *rv;
    529 
    530 	rv = kmem_alloc(size, kmflag);
    531 	if (rv)
    532 		memset(rv, 0, size);
    533 
    534 	return rv;
    535 }
    536 
    537 void
    538 kmem_free(void *p, size_t size)
    539 {
    540 
    541 	rumpuser_free(p);
    542 }
    543 #endif /* RUMP_USE_REAL_ALLOCATORS */
    544 
    545 /*
    546  * UVM km
    547  */
    548 
    549 vaddr_t
    550 uvm_km_alloc(struct vm_map *map, vsize_t size, vsize_t align, uvm_flag_t flags)
    551 {
    552 	void *rv;
    553 	int alignbit, error;
    554 
    555 	alignbit = 0;
    556 	if (align) {
    557 		alignbit = ffs(align)-1;
    558 	}
    559 
    560 	rv = rumpuser_anonmmap(size, alignbit, flags & UVM_KMF_EXEC, &error);
    561 	if (rv == NULL) {
    562 		if (flags & (UVM_KMF_CANFAIL | UVM_KMF_NOWAIT))
    563 			return 0;
    564 		else
    565 			panic("uvm_km_alloc failed");
    566 	}
    567 
    568 	if (flags & UVM_KMF_ZERO)
    569 		memset(rv, 0, size);
    570 
    571 	return (vaddr_t)rv;
    572 }
    573 
    574 void
    575 uvm_km_free(struct vm_map *map, vaddr_t vaddr, vsize_t size, uvm_flag_t flags)
    576 {
    577 
    578 	rumpuser_unmap((void *)vaddr, size);
    579 }
    580 
    581 struct vm_map *
    582 uvm_km_suballoc(struct vm_map *map, vaddr_t *minaddr, vaddr_t *maxaddr,
    583 	vsize_t size, int pageable, bool fixed, struct vm_map_kernel *submap)
    584 {
    585 
    586 	return (struct vm_map *)417416;
    587 }
    588 
    589 vaddr_t
    590 uvm_km_alloc_poolpage(struct vm_map *map, bool waitok)
    591 {
    592 
    593 	return (vaddr_t)rumpuser_malloc(PAGE_SIZE, !waitok);
    594 }
    595 
    596 void
    597 uvm_km_free_poolpage(struct vm_map *map, vaddr_t addr)
    598 {
    599 
    600 	rumpuser_unmap((void *)addr, PAGE_SIZE);
    601 }
    602 
    603 vaddr_t
    604 uvm_km_alloc_poolpage_cache(struct vm_map *map, bool waitok)
    605 {
    606 	void *rv;
    607 	int error;
    608 
    609 	rv = rumpuser_anonmmap(PAGE_SIZE, PAGE_SHIFT, 0, &error);
    610 	if (rv == NULL && waitok)
    611 		panic("fixme: poolpage alloc failed");
    612 
    613 	return (vaddr_t)rv;
    614 }
    615 
    616 void
    617 uvm_km_free_poolpage_cache(struct vm_map *map, vaddr_t vaddr)
    618 {
    619 
    620 	rumpuser_unmap((void *)vaddr, PAGE_SIZE);
    621 }
    622 
    623 /*
    624  * Mapping and vm space locking routines.
    625  * XXX: these don't work for non-local vmspaces
    626  */
    627 int
    628 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access)
    629 {
    630 
    631 	KASSERT(vs == &rump_vmspace);
    632 	return 0;
    633 }
    634 
    635 void
    636 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
    637 {
    638 
    639 	KASSERT(vs == &rump_vmspace);
    640 }
    641 
    642 void
    643 vmapbuf(struct buf *bp, vsize_t len)
    644 {
    645 
    646 	bp->b_saveaddr = bp->b_data;
    647 }
    648 
    649 void
    650 vunmapbuf(struct buf *bp, vsize_t len)
    651 {
    652 
    653 	bp->b_data = bp->b_saveaddr;
    654 	bp->b_saveaddr = 0;
    655 }
    656 
    657 void
    658 uvm_wait(const char *msg)
    659 {
    660 
    661 	/* nothing to wait for */
    662 }
    663 
    664 void
    665 uvmspace_free(struct vmspace *vm)
    666 {
    667 
    668 	/* nothing for now */
    669 }
    670 
    671 int
    672 uvm_io(struct vm_map *map, struct uio *uio)
    673 {
    674 
    675 	/*
    676 	 * just do direct uio for now.  but this needs some vmspace
    677 	 * olympics for rump_sysproxy.
    678 	 */
    679 	return uiomove((void *)(vaddr_t)uio->uio_offset, uio->uio_resid, uio);
    680 }
    681 
    682 /*
    683  * page life cycle stuff.  it really doesn't exist, so just stubs.
    684  */
    685 
    686 void
    687 uvm_pageactivate(struct vm_page *pg)
    688 {
    689 
    690 	/* nada */
    691 }
    692 
    693 void
    694 uvm_pagedeactivate(struct vm_page *pg)
    695 {
    696 
    697 	/* nada */
    698 }
    699 
    700 void
    701 uvm_pagedequeue(struct vm_page *pg)
    702 {
    703 
    704 	/* nada*/
    705 }
    706 
    707 void
    708 uvm_pageenqueue(struct vm_page *pg)
    709 {
    710 
    711 	/* nada */
    712 }
    713